diff --git a/.Rbuildignore b/.Rbuildignore index 03f1b66..eb0b303 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,3 +22,18 @@ Icon? ^\.lintr$ ^revdep_manual$ ^vignettes/.*_cache$ +^benchmark$ +^CSX2026\.pdf$ +^Efficient_DiD\.pdf$ +^Rplots\.pdf$ +^CODEX_AUDIT_HANDOFF\.md$ +^EDID_implemention\.md$ +^IMPLEMENTATION_PLAN\.md$ +^METHODOLOGY_REVIEW\.md$ +^audit\.md$ +^audit_cov\.md$ +^comprehension\.md$ +^implementation\.md$ +^mailbox\.md$ +^spec\.md$ +^test-spec\.md$ diff --git a/.gitignore b/.gitignore index da888ab..eabd0a0 100644 --- a/.gitignore +++ b/.gitignore @@ -13,8 +13,26 @@ desktop.ini /Meta/ did.Rproj ..Rcheck/ +*.Rcheck/ +*.tar.gz .claude/ CLAUDE.md .vscode/ .revdep_manual/ -vignettes/*_cache/ \ No newline at end of file +vignettes/*_cache/ + +# Personal working files (not part of the package) +CSX2026.pdf +Efficient_DiD.pdf +Rplots.pdf +CODEX_AUDIT_HANDOFF.md +EDID_implemention.md +IMPLEMENTATION_PLAN.md +METHODOLOGY_REVIEW.md +audit.md +audit_cov.md +comprehension.md +implementation.md +mailbox.md +spec.md +test-spec.md \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 969f75f..2db2ec6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method(as.data.frame,edid_fit) +S3method(coef,edid_fit) S3method(ggdid,AGGTEobj) S3method(ggdid,MP) S3method(glance,AGGTEobj) @@ -7,23 +9,30 @@ S3method(glance,MP) S3method(nobs,AGGTEobj) S3method(nobs,MP) S3method(print,AGGTEobj) +S3method(print,AGGTEobj_edid) S3method(print,MP) +S3method(print,edid_fit) S3method(summary,AGGTEobj) +S3method(summary,AGGTEobj_edid) S3method(summary,MP) S3method(summary,MP.TEST) +S3method(summary,edid_fit) S3method(tidy,AGGTEobj) S3method(tidy,MP) +S3method(vcov,edid_fit) export(AGGTEobj) export(DIDparams) export(MP) export(MP.TEST) export(aggte) +export(aggte_edid) export(att_gt) export(build_sim_dataset) export(compute.aggte) export(compute.att_gt) export(compute.att_gt2) export(conditional_did_pretest) +export(edid) export(ggdid) export(glance) export(gplot) @@ -59,6 +68,7 @@ importFrom(generics,tidy) importFrom(methods,as) importFrom(methods,is) importFrom(stats,aggregate) +importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,complete.cases) importFrom(stats,cov) @@ -74,6 +84,7 @@ importFrom(stats,predict) importFrom(stats,qnorm) importFrom(stats,quantile) importFrom(stats,rnorm) +importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,var) importFrom(tidyr,gather) diff --git a/NEWS.md b/NEWS.md index aa6537c..88f191a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # did 2.3.1.904 + * Added `edid()`: efficient DiD estimator (Chen, Sant'Anna & Xie 2025) supporting PT-All and PT-Post parallel trends assumptions, analytical EIF-based standard errors (iid and cluster-robust), multiplier bootstrap inference (Rademacher, Mammen, Webb), and overall/event-study/group aggregation with WIF correction. + * Fixed bug where `faster_mode = TRUE` and `faster_mode = FALSE` produced different ATT estimates when sampling weights (`weightsname`) vary across time. The fast path was always using first-period weights; it now correctly uses the same period's weights as the slow path * New `fix_weights` argument in `att_gt()` gives users explicit control over how time-varying sampling weights are resolved in each 2x2 DiD comparison. Options: `NULL` (default, preserves existing behavior), `"varying"` (per-observation weights using RC estimators), `"base_period"` (fix at g-1 for all cells), `"first_period"` (fix at first period). See `?att_gt` for details diff --git a/R/edid-aggregate.R b/R/edid-aggregate.R new file mode 100644 index 0000000..6b55201 --- /dev/null +++ b/R/edid-aggregate.R @@ -0,0 +1,450 @@ +# edid-aggregate.R +# Aggregation functions for the EDiD estimator: +# aggregate_overall_edid() +# aggregate_event_study_edid() +# aggregate_group_edid() +# compute_wif_contribution_edid() + +# --------------------------------------------------------------------------- +# Internal helper: pull EIF column from eif_matrix for a given cell_id +# --------------------------------------------------------------------------- +.eif_col <- function(eif_matrix, cell_id) { + if (is.null(eif_matrix)) return(NULL) + eif_matrix[, cell_id, drop = TRUE] +} + +# --------------------------------------------------------------------------- +# WIF correction +# --------------------------------------------------------------------------- + +#' Compute WIF (weight influence function) correction for aggregated EIF +#' +#' Corrects the aggregated EIF for estimation error in the cohort-share weights +#' \eqn{\pi_g = n_g / n}. The correction accounts for the fact that \eqn{\pi_g} +#' is estimated from data and therefore contributes to the variance of the +#' aggregated estimator. +#' +#' @param weight_fn function(cells, cell_index, panel_obj) that returns a named +#' numeric vector of normalized weights \eqn{q_k} for all post-treatment cells; +#' names are cell_ids (character) +#' @param cells list of \code{edid_cell_result} objects +#' @param eif_matrix n x n_cells numeric matrix (or NULL) +#' @param cell_index data.frame with columns \code{group}, \code{time}, +#' \code{cell_id}, \code{is_pre} +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param agg_att scalar: the already-computed aggregated ATT (used for WIF) +#' +#' @return numeric vector length n: WIF correction to add to aggregated EIF +#' @keywords internal +compute_wif_contribution_edid <- function( + weight_fn, cells, eif_matrix, cell_index, panel_obj, agg_att +) { + n <- panel_obj$n + wif <- numeric(n) + + if (is.null(eif_matrix)) return(wif) + + # Obtain normalized weights for all post-treatment cells + q_vec <- weight_fn(cells, cell_index, panel_obj) # named numeric + + # Get post-treatment cell ids and their ATTs + post_ci <- cell_index[!cell_index$is_pre, , drop = FALSE] + post_ids <- as.character(post_ci$cell_id) + post_atts <- vapply(post_ids, function(cid) { + idx <- which(post_ci$cell_id == as.integer(cid)) + cells[[post_ci$cell_id[idx]]]$att + }, numeric(1L)) + + # For each cohort g, compute the WIF contribution from pi_g uncertainty. + # WIF_i = sum_k [ (att_k - agg_att) * d(q_k)/d(pi_g_k) * d(pi_g_k)/dI(G_i=g_k) ] + # With q_k = pi_{g_k} / sum_{k'} pi_{g_{k'}} : + # d(q_k)/d(pi_g) = (I(g_k==g) * S - pi_{g_k}) / S^2 + # where S = sum_{k' in post} pi_{g_{k'}} + # d(pi_g)/dI(G_i=g) = (1/n) * (1 - I(G_i=g) * n_g_hat / n_g) ... simplified below + + S <- sum(q_vec[post_ids] * (q_vec[post_ids] > 0)) # sum of unnormalized weights + # Actually q_vec already gives normalized weights. Rebuild unnorm (pi_g_k): + # unnorm_k = pi_{g_k}; norm_k = pi_{g_k} / S_unnorm + # We need: d(q_k_norm)/d(pi_{g}) = (I(g_k==g)*S_u - pi_{g_k}) / S_u^2 + # where S_u = sum pi_{g_{k'}}. + + # Collect pi_{g_k} for each post cell + pi_by_cell <- vapply(post_ids, function(cid) { + g_val <- post_ci$group[post_ci$cell_id == as.integer(cid)] + panel_obj$cohort_fractions[[as.character(g_val)]] + }, numeric(1L)) + S_u <- sum(pi_by_cell) # sum of unnormalized (cohort share) weights + + if (S_u < EDID_DENOM_EPS) return(wif) + + for (g in panel_obj$treatment_groups) { + mask_g <- panel_obj$cohort_masks[[as.character(g)]] + n_g <- sum(mask_g) + if (n_g == 0L) next + pi_g <- panel_obj$cohort_fractions[[as.character(g)]] + + # Cells belonging to cohort g (among post cells) + g_post_mask <- post_ci$group == g + if (!any(g_post_mask)) next + + # sum_{k: g_k=g} (att_k - agg_att) * (S_u - pi_g * K_g) / S_u^2 + # where K_g = number of post cells for cohort g + # More precisely, the partial derivative for cohort g: + # dq_k/d(pi_g) = (I(g_k==g)*S_u - pi_g * sum_all 1) / S_u^2 + # But S_u = sum_k pi_{g_k}, so d(S_u)/d(pi_g) = K_g (# cells with g_k=g) + K_g <- sum(g_post_mask) + g_ids <- post_ids[g_post_mask] + g_atts <- post_atts[g_post_mask] + + # For each post cell k: + # if g_k == g: dq_k/d(pi_g) = (S_u - pi_g * K_g) / S_u^2 + 0 ... simplified: + # dq_k/d(pi_g) = (I(g_k==g) * S_u - pi_g * K_g) / S_u^2 + # if g_k != g: dq_k/d(pi_g) = -pi_{g_k} * K_g / S_u^2 + # Then d(agg_att)/d(pi_g) = sum_k att_k * dq_k/d(pi_g) + # And WIF contribution for unit i in cohort g: + # d(agg_att)/d(pi_g) * d(pi_g)/d(I(G_i=g)) = d(agg_att)/d(pi_g) * (1 - pi_g * K_g / K_g?) ... + # d(pi_g)/d(I(G_i=g)) = 1/n (adding one unit to cohort g increases pi_g by 1/n) + + # Contribution from cells in cohort g (g_k == g): + d_agg_d_pig_from_g <- sum(g_atts) * (S_u - pi_g * K_g) / (S_u^2) + # Contribution from cells NOT in cohort g (g_k != g): + not_g_mask <- !g_post_mask + if (any(not_g_mask)) { + not_g_atts <- post_atts[not_g_mask] + not_g_pi <- pi_by_cell[not_g_mask] + d_agg_d_pig_from_notg <- sum(not_g_atts * (-not_g_pi * K_g) / (S_u^2)) + } else { + d_agg_d_pig_from_notg <- 0 + } + d_agg_d_pig <- d_agg_d_pig_from_g + d_agg_d_pig_from_notg + + # Units in cohort g: d(pi_g)/dI(G_i=g) = 1/n + wif[mask_g] <- wif[mask_g] + d_agg_d_pig * (1 / n) + + # Units outside cohort g also have a contribution via d(pi_g)/dI(G_i=g): + # d(pi_g)/dI(G_i=g) for i not in g = -pi_g / (n * (1 - pi_g))... but + # using the simpler sample-level: d(n_g/n)/dI(G_i not g) = 0 exactly. + # (Adding unit i outside cohort g does not change n_g.) So no contribution + # for units outside cohort g from this cohort's weight derivative. Correct. + } + + wif +} + +# --------------------------------------------------------------------------- +# Overall ATT aggregation +# --------------------------------------------------------------------------- + +#' Aggregate cell-level ATTs into an overall ATT +#' +#' Uses cohort-share weights \eqn{q_k = \pi_{g_k}} over post-treatment cells, +#' normalized to sum to 1. Includes WIF correction for estimated weights. +#' +#' @param cells list of \code{edid_cell_result} objects (ordered by cell_id) +#' @param eif_matrix n x n_cells numeric matrix (or NULL if EIF not stored) +#' @param cell_index data.frame with columns \code{group}, \code{time}, +#' \code{cell_id}, \code{is_pre} +#' @param panel_obj panel object +#' @param alpha significance level +#' +#' @return named list: \code{att}, \code{se}, \code{ci_lower}, \code{ci_upper}, +#' \code{t_stat}, \code{p_value}, \code{eif_agg} +#' @keywords internal +aggregate_overall_edid <- function(cells, eif_matrix, cell_index, panel_obj, alpha) { + + # Post-treatment cells only + post_ci <- cell_index[!cell_index$is_pre, , drop = FALSE] + post_ids <- post_ci$cell_id # integer indices into cells list + + # Filter to cells with valid (non-NA) ATT + valid_mask <- vapply(post_ids, function(cid) { + !is.null(cells[[cid]]$att) && is.finite(cells[[cid]]$att) + }, logical(1L)) + + if (!any(valid_mask)) { + return(list(att = NA_real_, se = NA_real_, ci_lower = NA_real_, + ci_upper = NA_real_, t_stat = NA_real_, p_value = NA_real_, + eif_agg = NULL)) + } + + post_ci_v <- post_ci[valid_mask, , drop = FALSE] + post_ids_v <- post_ci_v$cell_id + + # Cohort-share weights (unnormalized) + pi_g_k <- vapply(post_ids_v, function(cid) { + panel_obj$cohort_fractions[[as.character(post_ci_v$group[post_ci_v$cell_id == cid])]] + }, numeric(1L)) + S_u <- sum(pi_g_k) + if (S_u < EDID_DENOM_EPS) { + return(list(att = NA_real_, se = NA_real_, ci_lower = NA_real_, + ci_upper = NA_real_, t_stat = NA_real_, p_value = NA_real_, + eif_agg = NULL)) + } + q_norm <- pi_g_k / S_u + + # Point estimate + att_k <- vapply(post_ids_v, function(cid) cells[[cid]]$att, numeric(1L)) + overall_att <- sum(q_norm * att_k) + + # Aggregated EIF (direct contribution) + eif_agg <- NULL + if (!is.null(eif_matrix)) { + n <- panel_obj$n + eif_agg <- numeric(n) + for (ii in seq_along(post_ids_v)) { + cid <- post_ids_v[ii] + eif_c <- .eif_col(eif_matrix, cid) + if (!is.null(eif_c)) { + eif_agg <- eif_agg + q_norm[ii] * eif_c + } + } + + # WIF correction: weight function for overall ATT + weight_fn_overall <- function(cells_arg, cell_index_arg, panel_obj_arg) { + ci_post <- cell_index_arg[!cell_index_arg$is_pre, , drop = FALSE] + ids <- ci_post$cell_id + valid <- vapply(ids, function(cid) { + !is.null(cells_arg[[cid]]$att) && is.finite(cells_arg[[cid]]$att) + }, logical(1L)) + ids_v <- ids[valid] + ci_v <- ci_post[valid, , drop = FALSE] + pg <- vapply(ids_v, function(cid) { + panel_obj_arg$cohort_fractions[[as.character( + ci_v$group[ci_v$cell_id == cid])]] + }, numeric(1L)) + Su <- sum(pg) + if (Su < EDID_DENOM_EPS) { + return(stats::setNames(rep(0, length(ids_v)), as.character(ids_v))) + } + stats::setNames(pg / Su, as.character(ids_v)) + } + + wif <- compute_wif_contribution_edid( + weight_fn_overall, cells, eif_matrix, cell_index, panel_obj, overall_att + ) + eif_agg <- eif_agg + wif + } + + # SE and inference + inf_res <- safe_inference_edid( + eif_agg, panel_obj$cluster_indices, alpha, overall_att + ) + + list( + att = overall_att, + se = inf_res$se, + ci_lower = inf_res$ci_lower, + ci_upper = inf_res$ci_upper, + t_stat = inf_res$t_stat, + p_value = inf_res$p_value, + eif_agg = eif_agg + ) +} + +# --------------------------------------------------------------------------- +# Event-study aggregation +# --------------------------------------------------------------------------- + +#' Aggregate cell-level ATTs by relative time (event study) +#' +#' For each unique relative time \eqn{e = t - g}, computes the cohort-share- +#' weighted average ATT over all \code{(g, t)} cells with \eqn{t - g = e}. +#' Includes WIF correction. +#' +#' @param cells list of \code{edid_cell_result} objects +#' @param eif_matrix n x n_cells numeric matrix (or NULL) +#' @param cell_index data.frame with columns \code{group}, \code{time}, +#' \code{cell_id}, \code{is_pre} +#' @param panel_obj panel object +#' @param alpha significance level +#' @param balance_e integer or NULL: if not NULL, restrict to \code{[-balance_e, balance_e]} +#' +#' @return named list, one entry per unique relative time; each entry is a list +#' with \code{e}, \code{att}, \code{se}, \code{ci_lower}, \code{ci_upper}, +#' \code{t_stat}, \code{p_value}, \code{eif_agg} +#' @keywords internal +aggregate_event_study_edid <- function( + cells, eif_matrix, cell_index, panel_obj, alpha, balance_e = NULL +) { + # Compute relative times + cell_index$e <- cell_index$time - cell_index$group + + # Apply balance_e restriction + if (!is.null(balance_e)) { + cell_index <- cell_index[abs(cell_index$e) <= balance_e, , drop = FALSE] + } + + unique_e <- sort(unique(cell_index$e)) + result <- vector("list", length(unique_e)) + names(result) <- as.character(unique_e) + + n <- panel_obj$n + + for (ii in seq_along(unique_e)) { + e_val <- unique_e[ii] + e_mask <- cell_index$e == e_val + e_ci <- cell_index[e_mask, , drop = FALSE] + e_ids <- e_ci$cell_id + + # Valid cells + valid <- vapply(e_ids, function(cid) { + !is.null(cells[[cid]]$att) && is.finite(cells[[cid]]$att) + }, logical(1L)) + + na_entry <- list(e = e_val, att = NA_real_, se = NA_real_, + ci_lower = NA_real_, ci_upper = NA_real_, + t_stat = NA_real_, p_value = NA_real_, eif_agg = NULL) + if (!any(valid)) { + result[[ii]] <- na_entry + next + } + + e_ci_v <- e_ci[valid, , drop = FALSE] + e_ids_v <- e_ci_v$cell_id + + pi_g_k <- vapply(seq_along(e_ids_v), function(jj) { + panel_obj$cohort_fractions[[as.character(e_ci_v$group[jj])]] + }, numeric(1L)) + S_u <- sum(pi_g_k) + if (S_u < EDID_DENOM_EPS) { result[[ii]] <- na_entry; next } + q_norm <- pi_g_k / S_u + + att_k <- vapply(e_ids_v, function(cid) cells[[cid]]$att, numeric(1L)) + es_att <- sum(q_norm * att_k) + + eif_agg <- NULL + if (!is.null(eif_matrix)) { + eif_agg <- numeric(n) + for (jj in seq_along(e_ids_v)) { + cid <- e_ids_v[jj] + eif_c <- .eif_col(eif_matrix, cid) + if (!is.null(eif_c)) eif_agg <- eif_agg + q_norm[jj] * eif_c + } + + # Build a cell_index restricted to this e-group for WIF + # WIF weight function: cohort-share weights restricted to this e + e_cell_index_restricted <- cell_index + # Mark non-e cells as pre so weight_fn ignores them + e_cell_index_restricted$is_pre <- TRUE + e_cell_index_restricted$is_pre[e_mask] <- e_ci$is_pre + + weight_fn_e <- .make_weight_fn_es(e_val) + wif <- compute_wif_contribution_edid( + weight_fn_e, cells, eif_matrix, e_cell_index_restricted, panel_obj, es_att + ) + eif_agg <- eif_agg + wif + } + + inf_res <- safe_inference_edid(eif_agg, panel_obj$cluster_indices, alpha, es_att) + + result[[ii]] <- list( + e = e_val, + att = es_att, + se = inf_res$se, + ci_lower = inf_res$ci_lower, + ci_upper = inf_res$ci_upper, + t_stat = inf_res$t_stat, + p_value = inf_res$p_value, + eif_agg = eif_agg + ) + } + result +} + +# Helper: build weight function for event-study relative time e +.make_weight_fn_es <- function(e_val) { + force(e_val) + function(cells_arg, cell_index_arg, panel_obj_arg) { + ci <- cell_index_arg + ci$e_local <- ci$time - ci$group + e_mask_local <- ci$e_local == e_val & !ci$is_pre + ids_v <- ci$cell_id[e_mask_local] + valid <- vapply(ids_v, function(cid) { + !is.null(cells_arg[[cid]]$att) && is.finite(cells_arg[[cid]]$att) + }, logical(1L)) + ids_v <- ids_v[valid] + ci_v <- ci[e_mask_local, , drop = FALSE][valid, , drop = FALSE] + pg <- vapply(seq_along(ids_v), function(jj) { + panel_obj_arg$cohort_fractions[[as.character(ci_v$group[jj])]] + }, numeric(1L)) + Su <- sum(pg) + if (Su < EDID_DENOM_EPS) { + return(stats::setNames(rep(0, length(ids_v)), as.character(ids_v))) + } + stats::setNames(pg / Su, as.character(ids_v)) + } +} + +# --------------------------------------------------------------------------- +# Group aggregation +# --------------------------------------------------------------------------- + +#' Aggregate cell-level ATTs by treatment cohort +#' +#' For each cohort \code{g}, computes the equal-time-weighted average ATT over +#' all post-treatment cells \code{(g, t)} with \code{t >= g}. +#' Group aggregation uses equal weights so there is no WIF correction. +#' +#' @param cells list of \code{edid_cell_result} objects +#' @param eif_matrix n x n_cells numeric matrix (or NULL) +#' @param cell_index data.frame with columns \code{group}, \code{time}, +#' \code{cell_id}, \code{is_pre} +#' @param panel_obj panel object +#' @param alpha significance level +#' +#' @return named list, one entry per cohort; each entry is a list with +#' \code{group}, \code{att}, \code{se}, \code{ci_lower}, \code{ci_upper}, +#' \code{t_stat}, \code{p_value}, \code{eif_agg} +#' @keywords internal +aggregate_group_edid <- function(cells, eif_matrix, cell_index, panel_obj, alpha) { + + tgroups <- panel_obj$treatment_groups + result <- vector("list", length(tgroups)) + names(result) <- as.character(tgroups) + n <- panel_obj$n + + for (ii in seq_along(tgroups)) { + g_val <- tgroups[ii] + g_mask <- cell_index$group == g_val & !cell_index$is_pre + g_ci <- cell_index[g_mask, , drop = FALSE] + g_ids <- g_ci$cell_id + + na_entry <- list(group = g_val, att = NA_real_, se = NA_real_, + ci_lower = NA_real_, ci_upper = NA_real_, + t_stat = NA_real_, p_value = NA_real_, eif_agg = NULL) + + valid <- vapply(g_ids, function(cid) { + !is.null(cells[[cid]]$att) && is.finite(cells[[cid]]$att) + }, logical(1L)) + if (!any(valid)) { result[[ii]] <- na_entry; next } + + g_ids_v <- g_ids[valid] + m_g <- length(g_ids_v) + att_k <- vapply(g_ids_v, function(cid) cells[[cid]]$att, numeric(1L)) + group_att <- mean(att_k) # equal weights + + eif_agg <- NULL + if (!is.null(eif_matrix)) { + eif_agg <- numeric(n) + for (cid in g_ids_v) { + eif_c <- .eif_col(eif_matrix, cid) + if (!is.null(eif_c)) eif_agg <- eif_agg + eif_c / m_g + } + # Equal weights: no WIF correction needed (weights don't depend on pi_g) + } + + inf_res <- safe_inference_edid(eif_agg, panel_obj$cluster_indices, alpha, group_att) + + result[[ii]] <- list( + group = g_val, + att = group_att, + se = inf_res$se, + ci_lower = inf_res$ci_lower, + ci_upper = inf_res$ci_upper, + t_stat = inf_res$t_stat, + p_value = inf_res$p_value, + eif_agg = eif_agg + ) + } + result +} diff --git a/R/edid-aggte.R b/R/edid-aggte.R new file mode 100644 index 0000000..f564b98 --- /dev/null +++ b/R/edid-aggte.R @@ -0,0 +1,270 @@ +# edid-aggte.R +# Aggregation function for edid_fit objects, mirroring the aggte() interface. + +#' Aggregate edid_fit estimates +#' +#' Provides the same user-facing interface as \code{\link[did]{aggte}} but +#' accepts an \code{edid_fit} object produced by \code{\link{edid}}. +#' +#' @param edid_fit_obj An \code{edid_fit} object returned by \code{edid()}. +#' @param type Character scalar: aggregation type. One of +#' \code{"simple"} (overall ATT), \code{"dynamic"} (event-study), +#' \code{"group"} (cohort-level ATT), or \code{"calendar"} (not implemented). +#' @param balance_e Integer or \code{NULL}: if not \code{NULL}, restricts the +#' dynamic aggregation to relative times in +#' \eqn{[-\text{balance\_e}, \text{balance\_e}]}. +#' @param min_e Numeric: minimum relative time to include in dynamic output. +#' Default \code{-Inf}. +#' @param max_e Numeric: maximum relative time to include in dynamic output. +#' Default \code{Inf}. +#' @param na.rm Logical: whether to remove NA ATT entries before aggregating. +#' Default \code{FALSE}. +#' +#' @return An S3 object of class \code{c("AGGTEobj_edid", "list")} with fields +#' matching \code{AGGTEobj} where possible: +#' \describe{ +#' \item{\code{att.egt}}{Vector of ATT estimates for each index.} +#' \item{\code{se.egt}}{Vector of standard errors.} +#' \item{\code{egt}}{Vector of indices (relative time, group, etc.).} +#' \item{\code{type}}{The aggregation type string.} +#' \item{\code{overall.att}}{Scalar overall ATT.} +#' \item{\code{overall.se}}{Scalar overall SE.} +#' \item{\code{alp}}{Significance level used.} +#' \item{\code{call}}{The matched call.} +#' } +#' +#' @seealso \code{\link{edid}}, \code{\link[did]{aggte}} +#' @export +aggte_edid <- function( + edid_fit_obj, + type = c("simple", "dynamic", "group", "calendar"), + balance_e = NULL, + min_e = -Inf, + max_e = Inf, + na.rm = FALSE +) { + mc <- match.call() + type <- match.arg(type) + + if (!inherits(edid_fit_obj, "edid_fit")) { + stop("`edid_fit_obj` must be an object of class `edid_fit` returned by edid().") + } + + alp <- edid_fit_obj$alpha + + # Helper: extract overall ATT + SE from the edid_fit object + .get_overall <- function(obj) { + ov <- obj$overall + att <- if (!is.null(ov)) ov$att else NA_real_ + se <- if (!is.null(ov)) ov$se else NA_real_ + list(att = att, se = se) + } + + if (type == "calendar") { + stop("aggte_edid() does not support type = \"calendar\". ", + "edid() does not compute calendar-time treatment effects.") + } + + # ------------------------------------------------------------------ + # simple: return overall ATT already computed + # ------------------------------------------------------------------ + if (type == "simple") { + ov_info <- .get_overall(edid_fit_obj) + out <- list( + att.egt = ov_info$att, + se.egt = ov_info$se, + egt = NA_real_, + type = type, + overall.att = ov_info$att, + overall.se = ov_info$se, + alp = alp, + call = mc + ) + class(out) <- c("AGGTEobj_edid", "list") + return(out) + } + + # ------------------------------------------------------------------ + # dynamic: event-study, filter by min_e / max_e / balance_e + # ------------------------------------------------------------------ + if (type == "dynamic") { + es_list <- edid_fit_obj$event_study + if (is.null(es_list) || length(es_list) == 0L) { + stop("No event-study results in edid_fit_obj. ", + "Re-run edid() with aggregate = \"event_study\" or \"all\".") + } + + # Convert to flat vectors + e_vals <- vapply(es_list, function(x) x$e, numeric(1L)) + att_vec <- vapply(es_list, function(x) x$att, numeric(1L)) + se_vec <- vapply(es_list, function(x) x$se, numeric(1L)) + + # Apply balance_e filter first (symmetric window) + if (!is.null(balance_e)) { + keep_bal <- (e_vals >= -balance_e) & (e_vals <= balance_e) + e_vals <- e_vals[keep_bal] + att_vec <- att_vec[keep_bal] + se_vec <- se_vec[keep_bal] + } + + # Apply min_e / max_e filter + keep_range <- (e_vals >= min_e) & (e_vals <= max_e) + e_vals <- e_vals[keep_range] + att_vec <- att_vec[keep_range] + se_vec <- se_vec[keep_range] + + # Optionally drop NAs + if (na.rm) { + keep_na <- !is.na(att_vec) + e_vals <- e_vals[keep_na] + att_vec <- att_vec[keep_na] + se_vec <- se_vec[keep_na] + } + + ov_info <- .get_overall(edid_fit_obj) + out <- list( + att.egt = att_vec, + se.egt = se_vec, + egt = e_vals, + type = type, + overall.att = ov_info$att, + overall.se = ov_info$se, + alp = alp, + call = mc + ) + class(out) <- c("AGGTEobj_edid", "list") + return(out) + } + + # ------------------------------------------------------------------ + # group: cohort-level ATTs + # ------------------------------------------------------------------ + if (type == "group") { + gr_list <- edid_fit_obj$group + if (is.null(gr_list) || length(gr_list) == 0L) { + stop("No group results in edid_fit_obj. ", + "Re-run edid() with aggregate = \"group\" or \"all\".") + } + + g_vals <- vapply(gr_list, function(x) x$group, numeric(1L)) + att_vec <- vapply(gr_list, function(x) x$att, numeric(1L)) + se_vec <- vapply(gr_list, function(x) x$se, numeric(1L)) + + if (na.rm) { + keep_na <- !is.na(att_vec) + g_vals <- g_vals[keep_na] + att_vec <- att_vec[keep_na] + se_vec <- se_vec[keep_na] + } + + ov_info <- .get_overall(edid_fit_obj) + out <- list( + att.egt = att_vec, + se.egt = se_vec, + egt = g_vals, + type = type, + overall.att = ov_info$att, + overall.se = ov_info$se, + alp = alp, + call = mc + ) + class(out) <- c("AGGTEobj_edid", "list") + return(out) + } +} + +#' Print method for AGGTEobj_edid objects +#' +#' Prints aggregated treatment effects in a format similar to +#' \code{print.AGGTEobj}. +#' +#' @param x an \code{AGGTEobj_edid} object +#' @param ... additional arguments (currently ignored) +#' +#' @return \code{x} invisibly +#' @export +print.AGGTEobj_edid <- function(x, ...) { + cat("\n") + cat("Call:\n") + print(x$call) + cat("\n") + + alp <- x$alp + pointwise_cval <- stats::qnorm(1 - alp / 2) + + # Overall ATT summary + ov_att <- x$overall.att + ov_se <- x$overall.se + if (!is.null(ov_att) && !is.na(ov_att)) { + ov_lo <- ov_att - pointwise_cval * ov_se + ov_hi <- ov_att + pointwise_cval * ov_se + ov_sig <- (ov_hi < 0) | (ov_lo > 0) + if (is.na(ov_sig)) ov_sig <- FALSE + ov_sig_text <- if (ov_sig) "*" else "" + + if (x$type == "dynamic") { + cat("Overall summary of ATT's based on event-study/dynamic aggregation: \n") + } else if (x$type == "group") { + cat("Overall summary of ATT's based on group/cohort aggregation: \n") + } else { + cat("Overall ATT: \n") + } + + out1 <- cbind.data.frame(round(ov_att, 4), round(ov_se, 4), + round(ov_lo, 4), round(ov_hi, 4), + ov_sig_text) + colnames(out1) <- c("ATT", " Std. Error", + paste0(" [ ", 100 * (1 - alp), "% "), + "Conf. Int.]", "") + print(out1, row.names = FALSE) + cat("\n\n") + } + + # Per-index table for dynamic / group + if (x$type %in% c("dynamic", "group")) { + if (x$type == "dynamic") { + c1name <- "Event time" + cat("Dynamic Effects:\n") + } else { + c1name <- "Group" + cat("Group Effects:\n") + } + + cband_text1 <- paste0("[", 100 * (1 - alp), "% Pointwise ") + + cband_lower <- x$att.egt - pointwise_cval * x$se.egt + cband_upper <- x$att.egt + pointwise_cval * x$se.egt + + sig <- (cband_upper < 0) | (cband_lower > 0) + sig[is.na(sig)] <- FALSE + sig_text <- ifelse(sig, "*", "") + + out2 <- cbind.data.frame(x$egt, x$att.egt, x$se.egt, cband_lower, cband_upper) + out2 <- round(out2, 4) + out2 <- cbind.data.frame(out2, sig_text) + colnames(out2) <- c(c1name, "Estimate", "Std. Error", + cband_text1, "Conf. Band]", "") + print(out2, row.names = FALSE, justify = "centre") + } + + cat("---\n") + cat("Signif. codes: `*' confidence band does not cover 0") + cat("\n\n") + cat("Estimation Method: Efficient DiD (Chen, Sant'Anna & Xie 2025)\n") + + invisible(x) +} + +#' Summary method for AGGTEobj_edid objects +#' +#' Delegates to \code{print.AGGTEobj_edid}. +#' +#' @param object an \code{AGGTEobj_edid} object +#' @param ... additional arguments (currently ignored) +#' +#' @return \code{object} invisibly +#' @export +summary.AGGTEobj_edid <- function(object, ...) { + print.AGGTEobj_edid(object, ...) + invisible(object) +} diff --git a/R/edid-bootstrap.R b/R/edid-bootstrap.R new file mode 100644 index 0000000..7e27011 --- /dev/null +++ b/R/edid-bootstrap.R @@ -0,0 +1,225 @@ +# edid-bootstrap.R +# Multiplier bootstrap for the EDiD estimator. + +#' Run the multiplier bootstrap for EDiD estimates +#' +#' Generates \code{n_bootstrap} perturbed versions of all cell-level ATTs +#' by multiplying stored EIF vectors with multiplier weights, then re-aggregates +#' using the same fixed cohort-share weights. +#' +#' @param cells list of \code{edid_cell_result} objects +#' @param eif_matrix n x n_cells numeric matrix of stored EIFs +#' @param cell_index data.frame with columns \code{group}, \code{time}, +#' \code{cell_id}, \code{is_pre} +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param n_bootstrap positive integer number of bootstrap draws +#' @param bootstrap_weights character: \code{"rademacher"}, \code{"mammen"}, +#' or \code{"webb"} +#' @param seed integer seed or NULL +#' @param aggregate character: which aggregations to return +#' @param balance_e integer or NULL +#' @param alpha significance level +#' +#' @return list with elements \code{overall_b}, \code{event_study_b}, +#' \code{group_b}, \code{n_bootstrap}, \code{weight_type}, \code{seed} +#' @keywords internal +run_multiplier_bootstrap_edid <- function( + cells, eif_matrix, cell_index, panel_obj, + n_bootstrap, bootstrap_weights = "rademacher", seed = NULL, + aggregate = "all", balance_e = NULL, alpha = 0.05 +) { + n <- panel_obj$n + + # Generate multiplier weights: n x n_bootstrap matrix + xi_mat <- generate_multiplier_weights_edid( + n = n, + n_bootstrap = n_bootstrap, + type = bootstrap_weights, + cluster_indices = panel_obj$cluster_indices, + seed = seed + ) + + # Post-treatment cell ids and their ATTs + post_ci <- cell_index[!cell_index$is_pre, , drop = FALSE] + post_ids <- post_ci$cell_id + valid_post <- vapply(post_ids, function(cid) { + !is.null(cells[[cid]]$att) && is.finite(cells[[cid]]$att) + }, logical(1L)) + post_ci_v <- post_ci[valid_post, , drop = FALSE] + post_ids_v <- post_ci_v$cell_id + + # Perturbed cell ATTs: ATT_b(g,t) = ATT_hat(g,t) + (1/n) * xi' * EIF + # Build perturbed ATT matrix: n_valid_post x n_bootstrap + att_hat_post <- vapply(post_ids_v, function(cid) cells[[cid]]$att, numeric(1L)) + eif_post <- eif_matrix[, post_ids_v, drop = FALSE] # n x n_valid_post + # perturbation: (1/n) * t(xi_mat) %*% eif_post -> n_bootstrap x n_valid_post + perturb_mat <- (1 / n) * t(xi_mat) %*% eif_post + att_boot_mat <- sweep(perturb_mat, 2, att_hat_post, `+`) + # att_boot_mat: n_bootstrap x n_valid_post + + # Cohort-share weights for overall + pi_g_k <- vapply(seq_along(post_ids_v), function(jj) { + panel_obj$cohort_fractions[[as.character(post_ci_v$group[jj])]] + }, numeric(1L)) + S_u <- sum(pi_g_k) + q_norm <- if (S_u > EDID_DENOM_EPS) pi_g_k / S_u else rep(1 / length(pi_g_k), length(pi_g_k)) + + # ----------------------------------------------------------------------- + # Overall bootstrap draws + # ----------------------------------------------------------------------- + overall_b <- NULL + if (aggregate %in% c("all", "overall")) { + overall_b <- drop(att_boot_mat %*% q_norm) # n_bootstrap vector + } + + # ----------------------------------------------------------------------- + # Event-study bootstrap draws + # ----------------------------------------------------------------------- + event_study_b <- NULL + if (aggregate %in% c("all", "event_study")) { + cell_index_v <- cell_index[cell_index$cell_id %in% post_ids_v, , drop = FALSE] + cell_index_v$e <- cell_index_v$time - cell_index_v$group + if (!is.null(balance_e)) { + cell_index_v <- cell_index_v[abs(cell_index_v$e) <= balance_e, , drop = FALSE] + } + unique_e <- sort(unique(cell_index_v$e)) + event_study_b <- vector("list", length(unique_e)) + names(event_study_b) <- as.character(unique_e) + + for (ii in seq_along(unique_e)) { + e_val <- unique_e[ii] + e_mask <- cell_index_v$e == e_val + e_ids <- cell_index_v$cell_id[e_mask] + e_g <- cell_index_v$group[e_mask] + # map e_ids to column indices in att_boot_mat + col_idx <- match(e_ids, post_ids_v) + col_idx <- col_idx[!is.na(col_idx)] + if (length(col_idx) == 0L) next + e_pi <- pi_g_k[col_idx] + e_Su <- sum(e_pi) + if (e_Su < EDID_DENOM_EPS) next + e_q <- e_pi / e_Su + event_study_b[[ii]] <- drop(att_boot_mat[, col_idx, drop = FALSE] %*% e_q) + } + } + + # ----------------------------------------------------------------------- + # Group bootstrap draws + # ----------------------------------------------------------------------- + group_b <- NULL + if (aggregate %in% c("all", "group")) { + tgroups <- panel_obj$treatment_groups + group_b <- vector("list", length(tgroups)) + names(group_b) <- as.character(tgroups) + + for (ii in seq_along(tgroups)) { + g_val <- tgroups[ii] + g_mask <- post_ci_v$group == g_val + g_ids <- post_ci_v$cell_id[g_mask] + col_idx <- match(g_ids, post_ids_v) + col_idx <- col_idx[!is.na(col_idx)] + if (length(col_idx) == 0L) next + m_g <- length(col_idx) + g_q <- rep(1 / m_g, m_g) + group_b[[ii]] <- drop(att_boot_mat[, col_idx, drop = FALSE] %*% g_q) + } + } + + list( + overall_b = overall_b, + event_study_b = event_study_b, + group_b = group_b, + n_bootstrap = n_bootstrap, + weight_type = bootstrap_weights, + seed = seed + ) +} + +#' Generate multiplier bootstrap weights +#' +#' Returns an \code{n x n_bootstrap} matrix of multiplier weights drawn from +#' the specified distribution. When \code{cluster_indices} is supplied, +#' weights are drawn at the cluster level (G x n_bootstrap) and then +#' expanded to unit level by repeating within cluster. +#' +#' @param n integer: number of units +#' @param n_bootstrap positive integer: number of bootstrap draws +#' @param type character: \code{"rademacher"} (default), \code{"mammen"}, +#' or \code{"webb"} +#' @param cluster_indices integer vector length n (values 1..G) or NULL +#' @param seed integer seed or NULL +#' +#' @return numeric matrix n x n_bootstrap +#' @keywords internal +generate_multiplier_weights_edid <- function( + n, n_bootstrap, type = "rademacher", cluster_indices = NULL, seed = NULL +) { + if (!is.null(seed)) set.seed(seed) + + # Determine draw size + if (!is.null(cluster_indices)) { + G <- length(unique(cluster_indices)) + draw_n <- G + } else { + G <- NULL + draw_n <- n + } + + xi_raw <- switch(type, + rademacher = { + matrix(sample(c(-1, 1), draw_n * n_bootstrap, replace = TRUE), + nrow = draw_n, ncol = n_bootstrap) + }, + mammen = { + p_pos <- (sqrt(5) + 1) / (2 * sqrt(5)) + p_neg <- 1 - p_pos + v_pos <- (sqrt(5) + 1) / 2 + v_neg <- -(sqrt(5) - 1) / 2 + raw <- matrix( + sample(c(v_neg, v_pos), draw_n * n_bootstrap, replace = TRUE, + prob = c(p_neg, p_pos)), + nrow = draw_n, ncol = n_bootstrap + ) + raw + }, + webb = { + webb_vals <- c(-sqrt(3/2), -1, -sqrt(1/2), sqrt(1/2), 1, sqrt(3/2)) + matrix( + sample(webb_vals, draw_n * n_bootstrap, replace = TRUE), + nrow = draw_n, ncol = n_bootstrap + ) + }, + stop(sprintf("Unknown bootstrap_weights type: \"%s\". ", + type), + "Choose one of \"rademacher\", \"mammen\", or \"webb\".") + ) + + # Expand cluster-level draws to unit level + if (!is.null(cluster_indices)) { + xi_unit <- xi_raw[cluster_indices, , drop = FALSE] + return(xi_unit) + } + xi_raw +} + +#' Compute bootstrap SE, CI, and p-value from a vector of bootstrap draws +#' +#' @param boot_draws numeric vector of length \code{n_bootstrap} +#' @param att_hat scalar point estimate +#' @param alpha significance level in (0, 1) +#' +#' @return named list: \code{se_boot}, \code{ci_lower}, \code{ci_upper}, +#' \code{p_value_boot} +#' @keywords internal +compute_bootstrap_stats_edid <- function(boot_draws, att_hat, alpha = 0.05) { + se_boot <- stats::sd(boot_draws - att_hat) + ci_lower <- unname(stats::quantile(boot_draws, alpha / 2)) + ci_upper <- unname(stats::quantile(boot_draws, 1 - alpha / 2)) + p_value <- mean(abs(boot_draws - att_hat) >= abs(att_hat)) + list( + se_boot = se_boot, + ci_lower = ci_lower, + ci_upper = ci_upper, + p_value_boot = p_value + ) +} diff --git a/R/edid-cov-eif.R b/R/edid-cov-eif.R new file mode 100644 index 0000000..0c631e8 --- /dev/null +++ b/R/edid-cov-eif.R @@ -0,0 +1,451 @@ +# edid-cov-eif.R +# Generated outcome and EIF computation for the EDiD covariate path. +# Implements Chen, Sant'Anna & Xie (2025) Eq. (3.9), (3.10), (3.12), (4.4). + +# --------------------------------------------------------------------------- +# Generated outcomes (doubly-robust, n x H matrix) +# --------------------------------------------------------------------------- + +#' Compute doubly-robust generated outcomes for a (g, t) cell +#' +#' Returns the n x H matrix of generated outcomes where column j corresponds +#' to pair j = \eqn{(g'_j, t_{pre,j})} and row i to unit i. Implements +#' Eq. (4.4) of Chen, Sant'Anna & Xie (2025). +#' +#' For self-comparison pairs (gp == g), the formula reduces to Eq. (3.2): +#' \deqn{\tilde{Y} = (G_g/\pi_g - r_{g,\infty} G_\infty/\pi_g)(Y_t - Y_{tpre} - m_{\infty,t,tpre})} +#' +#' For cross-cohort pairs (gp != g), the three-term doubly-robust formula applies: +#' \deqn{\tilde{Y} = (G_g/\pi_g)(Y_t - Y_1 - m_{\infty,t,1}) +#' - r_{g,\infty} (G_\infty/\pi_g)(Y_t - Y_{tpre} - m_{\infty,t,tpre}) +#' - r_{g,g'} (G_{g'}/\pi_g)(Y_{tpre} - Y_1 - m_{g',tpre,1})} +#' Note: term1 uses only \eqn{m_{\infty,t,1}}, not \eqn{m_{g',tpre,1}}. Adding +#' \eqn{m_{g',tpre,1}} to term1 would bias the estimator by +#' \eqn{E[Y_{tpre}-Y_1|G=g]}, since the propensity-ratio correction in term3 +#' accounts for G=g' units only, not G=g units. +#' +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param g scalar: treatment cohort +#' @param t scalar: target time period +#' @param pairs data.frame with columns \code{gp} and \code{tpre}; H rows +#' @param prop_ratios named list of n-vectors keyed by \code{as.character(gp)}: +#' cross-fitted propensity ratios. Must include key \code{"Inf"} for +#' \eqn{r_{g,\infty}} and keys for each cross-cohort gp. +#' @param cond_means named list of n-vectors keyed by +#' \code{paste0(gp, "_", period)}: cross-fitted conditional means +#' \eqn{E[Y_{period} - Y_1 | G=gp, X]}. Must include never-treated keys. +#' @param pt_assumption \code{"all"} or \code{"post"} +#' +#' @return numeric matrix n x H; entries may be NA if nuisances are NA +#' @keywords internal +compute_generated_outcomes_cov_edid <- function( + panel_obj, + g, + t, + pairs, + prop_ratios, + cond_means, + pt_assumption +) { + H <- nrow(pairs) + n <- panel_obj$n + ow <- panel_obj$outcome_wide + + mask_g <- panel_obj$cohort_masks[[as.character(g)]] + pi_g <- panel_obj$cohort_fractions[[as.character(g)]] + Ig <- as.numeric(mask_g) + + col_t <- panel_obj$period_to_col[[as.character(t)]] + col_1 <- panel_obj$period_to_col[[as.character(panel_obj$period_1)]] + + # Never-treated indicator (used in all pairs) + I_inf <- as.numeric(panel_obj$never_treated_mask) + + gen_out_mat <- matrix(NA_real_, nrow = n, ncol = H) + + for (j in seq_len(H)) { + gp_j <- pairs$gp[j] + tpre_j <- pairs$tpre[j] + col_tp <- panel_obj$period_to_col[[as.character(tpre_j)]] + + # Determine if this is a self-comparison pair + is_self <- is.finite(gp_j) && gp_j == g + + if (is_self) { + # ----------------------------------------------------------------- + # Self-comparison pair (gp == g): uses never-treated as comparison + # Eq. (3.2): phi = (G_g/pi_g - r[g,Inf]*G_Inf/pi_g) * + # (Y_t - Y_tpre - m_{Inf,t,tpre}(X)) + # ----------------------------------------------------------------- + r_inf <- prop_ratios[["Inf"]] + # m_{Inf,t,tpre}(X) = E[Y_t - Y_tpre | G=Inf, X] + # = E[Y_t - Y_1 | G=Inf, X] - E[Y_tpre - Y_1 | G=Inf, X] + m_inf_t <- cond_means[[paste0("Inf_", t)]] + m_inf_tp <- cond_means[[paste0("Inf_", tpre_j)]] + + if (is.null(r_inf) || is.null(m_inf_t) || is.null(m_inf_tp)) { + warning(sprintf( + "compute_generated_outcomes_cov_edid: missing nuisance for self-pair (gp=%g, tpre=%g).", + gp_j, tpre_j + )) + next + } + + m_inf_diff <- m_inf_t - m_inf_tp # m_{Inf,t,tpre}(X) + y_diff <- ow[, col_t] - ow[, col_tp] # Y_t - Y_tpre + + phi_j <- (Ig / pi_g - r_inf * I_inf / pi_g) * (y_diff - m_inf_diff) + + } else { + # ----------------------------------------------------------------- + # Cross-cohort pair (gp != g): full three-term Eq. (4.4) + # phi = (G_g/pi_g) * (Y_t - Y_1 - m_{Inf,t,1}(X) - m_{g',tpre,1}(X)) + # - r[g,Inf] * (G_Inf/pi_g) * (Y_t - Y_tpre - m_{Inf,t,tpre}(X)) + # - r[g,g'] * (G_g'/pi_g) * (Y_tpre - Y_1 - m_{g',tpre,1}(X)) + # ----------------------------------------------------------------- + gp_key <- as.character(gp_j) + + # Propensity ratios + r_inf <- prop_ratios[["Inf"]] + r_gp <- prop_ratios[[gp_key]] + + # Conditional means + m_inf_t <- cond_means[[paste0("Inf_", t)]] + m_inf_tp <- cond_means[[paste0("Inf_", tpre_j)]] + m_gp_tp <- cond_means[[paste0(gp_key, "_", tpre_j)]] + + if (is.null(r_inf) || is.null(r_gp) || + is.null(m_inf_t) || is.null(m_inf_tp) || is.null(m_gp_tp)) { + warning(sprintf( + "compute_generated_outcomes_cov_edid: missing nuisance for cross-pair (gp=%g, tpre=%g).", + gp_j, tpre_j + )) + next + } + + # Comparison cohort indicator + if (is.infinite(gp_j)) { + I_gp <- I_inf + } else { + mask_gp <- panel_obj$cohort_masks[[gp_key]] + if (is.null(mask_gp)) { + warning(sprintf("compute_generated_outcomes_cov_edid: no mask for gp=%g", gp_j)) + next + } + I_gp <- as.numeric(mask_gp) + } + + # m_{Inf,t,tpre}(X) = m_{Inf,t,1}(X) - m_{Inf,tpre,1}(X) + m_inf_diff <- m_inf_t - m_inf_tp + + # Outcome differences + Y_t <- ow[, col_t] + Y_1 <- ow[, col_1] + Y_tpre <- ow[, col_tp] + + # Three-term doubly-robust formula. + # + # The identification formula is: + # ATT(g,t) = E[Y_t-Y_1|G=g] - E[Y_t-Y_tpre|G=Inf] - E[Y_tpre-Y_1|G=g'] + # + # The AIPW augmentation replaces each group-mean term (w*Y) with + # (w*(Y - m(X))) + m(X), so the residuals have zero conditional mean under + # a correctly specified outcome model and consistency holds under correct + # propensity. For the treated group (term1), the relevant conditional mean + # is m_{Inf,t,1}(X) = E[Y_t^0-Y_1^0|X] (counterfactual, by PT). Adding + # m_{g',tpre,1}(X) to term1's subtraction would create a bias of + # E[Y_tpre-Y_1|G=g] (≈ tpre-1 for linear trend DGPs) because the + # propensity-ratio correction in term3 only cancels the G=g' contribution, + # not the G=g contribution. Hence term1 uses only m_inf_t. + term1 <- (Ig / pi_g) * (Y_t - Y_1 - m_inf_t) + term2 <- r_inf * (I_inf / pi_g) * (Y_t - Y_tpre - m_inf_diff) + term3 <- r_gp * (I_gp / pi_g) * (Y_tpre - Y_1 - m_gp_tp) + + phi_j <- term1 - term2 - term3 + } + + gen_out_mat[, j] <- phi_j + } + + gen_out_mat +} + +# --------------------------------------------------------------------------- +# Conditional Omega* (H x H) via Nadaraya-Watson kernel +# --------------------------------------------------------------------------- + +#' Compute the averaged conditional covariance matrix Omega*(X) +#' +#' Estimates \eqn{\Omega^* = n^{-1} \sum_i \hat\Omega^*(X_i)} using a faithful +#' plug-in of Eq. (3.12) from Chen, Sant'Anna & Xie (2025). +#' +#' Each (j,k)-th element of Omega*(X) is estimated using Nadaraya-Watson +#' kernel smoothing of outcome change covariances within specific cohorts, +#' scaled by propensity scores. +#' +#' \strong{Computational complexity}: O(n^2 * H^2). Emits a warning for +#' n > 1000. +#' +#' @param panel_obj panel object (needs \code{covariate_matrix}, \code{outcome_wide}, +#' \code{cohort_masks}, \code{never_treated_mask}) +#' @param g scalar: target treatment cohort +#' @param t scalar: target time period +#' @param pairs data.frame with columns \code{gp} and \code{tpre}; H rows +#' @param prop_ratios named list of n-vectors: cross-fitted propensity ratios +#' @param cond_means named list of n-vectors: cross-fitted conditional means +#' @param bw numeric vector length d or NULL (auto from \code{bw.nrd0}) +#' +#' @return numeric matrix H x H (positive semi-definite) +#' @keywords internal +compute_omega_star_cov_edid <- function(panel_obj, g, t, pairs, + prop_ratios, cond_means, + bw = NULL) { + X_mat <- panel_obj$covariate_matrix + n <- nrow(X_mat) + d <- ncol(X_mat) + H <- nrow(pairs) + ow <- panel_obj$outcome_wide + + if (n > 1000L) { + warning(sprintf( + "compute_omega_star_cov_edid: n=%d > 1000; O(n^2) kernel loop may be slow.", n + )) + } + + # ----------------------------------------------------------------------- + # Step 1: Bandwidths + # ----------------------------------------------------------------------- + if (is.null(bw)) { + bw <- numeric(d) + for (k in seq_len(d)) { + h_k <- tryCatch(stats::bw.nrd0(X_mat[, k]), error = function(e) 0) + if (!is.finite(h_k) || h_k < .Machine$double.eps) { + warning(sprintf( + "compute_omega_star_cov_edid: bandwidth for covariate %d is 0 or NA; using h=1.", k + )) + h_k <- 1 + } + bw[k] <- h_k + } + } + + # ----------------------------------------------------------------------- + # Step 2: Build kernel weight matrix K_mat[i, ell] (n x n) + # ----------------------------------------------------------------------- + K_mat <- matrix(1, nrow = n, ncol = n) + for (k in seq_len(d)) { + diff_k <- outer(X_mat[, k], X_mat[, k], "-") + K_mat <- K_mat * stats::dnorm(diff_k / bw[k]) / bw[k] + } + + # ----------------------------------------------------------------------- + # Step 3: Precompute outcome change residuals for each cohort + # For Eq. (3.12) we need: + # Cov(Y_t - Y_1, Y_t - Y_1 | G=g, X) [treated group] + # Cov(Y_t - Y_{t'_j}, Y_t - Y_{t'_k} | G=Inf, X) [never-treated] + # Cov(Y_t - Y_1, Y_{t'_j} - Y_1 | G=g, X) [cross-term, self-pairs] + # Cov(Y_{t'_j} - Y_1, Y_{t'_k} - Y_1 | G=g'_j, X) [cross-cohort] + # ----------------------------------------------------------------------- + col_t <- panel_obj$period_to_col[[as.character(t)]] + col_1 <- panel_obj$period_to_col[[as.character(panel_obj$period_1)]] + + mask_g <- panel_obj$cohort_masks[[as.character(g)]] + mask_inf <- panel_obj$never_treated_mask + + # ----------------------------------------------------------------------- + # Approximation note (Omega* scaling terms): + # Paper Eq. (3.12) uses conditional propensity scores 1/p_g(X) and + # 1/p_inf(X) as scalar pre-factors on each conditional covariance term. + # Here we approximate them by their unconditional counterparts 1/pi_g and + # 1/pi_inf, where pi_g = n_g/n. + # + # Justification: under a correctly specified propensity model, the + # conditional inverse propensity 1/p_g(X) enters Omega*(X) only as a + # scaling of a conditional covariance term. Replacing it by its + # unconditional analogue introduces a bias in Omega* of order + # O(Var(1/p_g(X))) relative to its true value. In moderate-overlap + # settings this bias is small. For severe overlap failures the + # approximation can be materially wrong; a warning is issued when extreme + # propensity ratios are detected in the nuisance estimation step. + # + # A fully faithful plug-in would require estimating 1/p_g(X) by + # 1/(pi_g * r_{g,inf}(X) / E[r_{g,inf}(X)]) or similar. That is a + # planned enhancement; for the current release this approximation is + # retained because it still provides consistent Omega* when p_g(X) is + # sufficiently homogeneous across X. + # ----------------------------------------------------------------------- + pi_g <- panel_obj$cohort_fractions[[as.character(g)]] + pi_inf <- sum(mask_inf) / n + + # Unconditional propensity approximation + inv_pg_unconditional <- 1 / pi_g + inv_pinf_unconditional <- 1 / pi_inf + + # Helper: kernel-smoothed conditional covariance of (A, B) given G=group at point x_i + # Returns an n-vector (one value per evaluation point) + kernel_cond_cov <- function(A, B, group_mask) { + # For each evaluation point i, compute: + # sum_{ell in group} K(x_i, x_ell) * (A_ell - mu_A(x_i)) * (B_ell - mu_B(x_i)) + # / sum_{ell in group} K(x_i, x_ell) + # Where mu_A(x_i) = sum_{ell in group} K * A_ell / sum K + idx <- which(group_mask) + if (length(idx) < 2L) return(rep(0, n)) + + K_group <- K_mat[, idx, drop = FALSE] # n x n_group + K_sums <- rowSums(K_group) + K_sums[K_sums < 1e-15] <- NA_real_ + + A_group <- A[idx] + B_group <- B[idx] + + # Kernel-weighted means + mu_A <- drop(K_group %*% A_group) / K_sums + mu_B <- drop(K_group %*% B_group) / K_sums + + # Kernel-weighted covariance + # sum_ell K[i,ell] * (A_ell - mu_A_i) * (B_ell - mu_B_i) / sum_ell K[i,ell] + resid_A <- sweep(matrix(A_group, nrow = n, ncol = length(idx), byrow = TRUE), + 1, mu_A, "-") + resid_B <- sweep(matrix(B_group, nrow = n, ncol = length(idx), byrow = TRUE), + 1, mu_B, "-") + cov_vals <- rowSums(K_group * resid_A * resid_B) / K_sums + cov_vals[is.na(cov_vals)] <- 0 + cov_vals + } + + # ----------------------------------------------------------------------- + # Step 4: Build Omega* by computing each (j,k) element via Eq. (3.12) + # then averaging over units + # ----------------------------------------------------------------------- + Omega_hat <- matrix(0, nrow = H, ncol = H) + + # Precompute outcome changes we'll need repeatedly + Y_t_minus_Y1 <- ow[, col_t] - ow[, col_1] + + for (j in seq_len(H)) { + gp_j <- pairs$gp[j] + tpre_j <- pairs$tpre[j] + col_tj <- panel_obj$period_to_col[[as.character(tpre_j)]] + + is_self_j <- is.finite(gp_j) && gp_j == g + + Y_t_minus_Ytj <- ow[, col_t] - ow[, col_tj] + Y_tj_minus_Y1 <- ow[, col_tj] - ow[, col_1] + + for (k in j:H) { + gp_k <- pairs$gp[k] + tpre_k <- pairs$tpre[k] + col_tk <- panel_obj$period_to_col[[as.character(tpre_k)]] + + is_self_k <- is.finite(gp_k) && gp_k == g + + Y_t_minus_Ytk <- ow[, col_t] - ow[, col_tk] + Y_tk_minus_Y1 <- ow[, col_tk] - ow[, col_1] + + # Eq. (3.12) term by term: + # Term 1: (1/p_g(X)) * Cov(Y_t - Y_1, Y_t - Y_1 | G=g, X) + term1 <- inv_pg_unconditional * + kernel_cond_cov(Y_t_minus_Y1, Y_t_minus_Y1, mask_g) + + # Term 2: (1/p_inf(X)) * Cov(Y_t - Y_{t'_j}, Y_t - Y_{t'_k} | G=Inf, X) + term2 <- inv_pinf_unconditional * + kernel_cond_cov(Y_t_minus_Ytj, Y_t_minus_Ytk, mask_inf) + + # Term 3: -1{g == g'_j}/p_g(X) * Cov(Y_t - Y_1, Y_{t'_j} - Y_1 | G=g, X) + term3 <- 0 + if (is_self_j) { + term3 <- -inv_pg_unconditional * + kernel_cond_cov(Y_t_minus_Y1, Y_tj_minus_Y1, mask_g) + } + + # Term 4: -1{g == g'_k}/p_g(X) * Cov(Y_t - Y_1, Y_{t'_k} - Y_1 | G=g, X) + term4 <- 0 + if (is_self_k) { + term4 <- -inv_pg_unconditional * + kernel_cond_cov(Y_t_minus_Y1, Y_tk_minus_Y1, mask_g) + } + + # Term 5: 1{g'_j == g'_k}/p_{g'_j}(X) * Cov(Y_{t'_j}-Y_1, Y_{t'_k}-Y_1 | G=g'_j, X) + term5 <- 0 + gp_j_eff <- if (is_self_j) Inf else gp_j + gp_k_eff <- if (is_self_k) Inf else gp_k + if (identical(gp_j_eff, gp_k_eff)) { + if (is.infinite(gp_j_eff)) { + # Both use never-treated — already captured in term2's structure + # Actually no: term5 uses Cov(Y_{t'_j}-Y_1, Y_{t'_k}-Y_1 | G=Inf, X) + # which is different from term2's Cov(Y_t-Y_{t'_j}, Y_t-Y_{t'_k} | G=Inf, X) + # For self-pairs where g'_j = g'_k = g (mapped to Inf), the "group" is Inf + inv_pgp <- inv_pinf_unconditional + mask_gp_jk <- mask_inf + } else { + pi_gp <- panel_obj$cohort_fractions[[as.character(gp_j)]] + if (is.null(pi_gp) || pi_gp < 1e-15) { + inv_pgp <- 0 + mask_gp_jk <- rep(FALSE, n) + } else { + inv_pgp <- 1 / pi_gp + mask_gp_jk <- panel_obj$cohort_masks[[as.character(gp_j)]] + } + } + term5 <- inv_pgp * + kernel_cond_cov(Y_tj_minus_Y1, Y_tk_minus_Y1, mask_gp_jk) + } + + # Average Omega*[j,k](X) over all units + omega_jk <- mean(term1 + term2 + term3 + term4 + term5) + + Omega_hat[j, k] <- omega_jk + if (k != j) Omega_hat[k, j] <- omega_jk + } + } + + # Ensure positive semi-definiteness via eigenvalue floor + eig <- eigen(Omega_hat, symmetric = TRUE) + eig$values <- pmax(eig$values, 1e-12) + Omega_hat <- eig$vectors %*% diag(eig$values, nrow = H) %*% t(eig$vectors) + + Omega_hat +} + +# --------------------------------------------------------------------------- +# EIF with covariate adjustment +# --------------------------------------------------------------------------- + +#' Compute the efficient influence function for a cell with covariates +#' +#' The efficient GMM estimator is +#' \deqn{\hat\beta_{g,t} = \sum_j w_j \cdot \frac{1}{n}\sum_i \tilde{Y}_{j,i}} +#' where \eqn{\tilde{Y}_{j,i}} is the doubly-robust generated outcome for pair j +#' and \eqn{w_j} are the fixed efficient weights. By the delta method, its +#' influence function is +#' \deqn{EIF_i = \sum_j w_j \cdot (\tilde{Y}_{j,i} - \beta_j) +#' = \left(\sum_j w_j \tilde{Y}_{j,i}\right) - ATT(g,t)} +#' (using \eqn{\sum_j w_j \beta_j = ATT(g,t)}). +#' +#' Statistical note: an alternative form \eqn{EIF_i = \sum_j w_j \tilde{Y}_{j,i} +#' + (G_{g,i}/\pi_g) \cdot ATT(g,t)} that appears in some semiparametric +#' efficiency calculations adds a term whose mean is \eqn{ATT(g,t)} (since +#' \eqn{E[G_{g,i}/\pi_g] = 1}). After centring, this equals +#' \eqn{(correct\,EIF) + (G_{g,i}/\pi_g - 1) \cdot ATT(g,t)}, inflating the +#' variance by \eqn{ATT^2 \cdot Var(G_{g,i}/\pi_g - 1) > 0} whenever +#' \eqn{ATT \ne 0}. The correct expression for the SE formula +#' \eqn{SE = \sqrt{\sum_i EIF_i^2 / n^2}} is the one below. +#' +#' @param panel_obj panel object (needs n) +#' @param gen_out_mat numeric matrix n x H (generated outcomes) +#' @param weights numeric vector length H summing to 1 +#' @param att_gt scalar point estimate (= sum_j w_j * colMeans(gen_out_mat)) +#' @param g scalar: target treatment cohort (unused; kept for API compatibility) +#' +#' @return numeric vector length n, mean approximately 0 +#' @keywords internal +compute_eif_cov_edid <- function(panel_obj, gen_out_mat, weights, att_gt, g) { + # EIF_i = (sum_j w_j * phi_{j,i}) - ATT(g,t) + # This has theoretical mean 0 (E[phi_j] = beta_j, sum_j w_j*beta_j = ATT). + eif <- drop(gen_out_mat %*% weights) - att_gt + + # Numerical centering: removes any floating-point residual from finite-sample + # estimation of nuisances. Does not change the variance formula. + eif <- eif - mean(eif) + eif +} diff --git a/R/edid-cov.R b/R/edid-cov.R new file mode 100644 index 0000000..32197cb --- /dev/null +++ b/R/edid-cov.R @@ -0,0 +1,392 @@ +# edid-cov.R +# Nuisance estimation functions for the EDiD covariate path. +# Implements sieve (B-spline) estimation of propensity ratios and conditional +# means with K-fold cross-fitting. + +# --------------------------------------------------------------------------- +# Fold assignment +# --------------------------------------------------------------------------- + +#' Generate cross-fitting fold assignments +#' +#' Assigns each of \code{n} units to one of \code{K} folds via simple +#' round-robin ordering (after optional random shuffling). +#' +#' @param n positive integer: number of units +#' @param K positive integer: number of folds (default 5) +#' @param seed integer or NULL: if not NULL, set.seed() is called and restored +#' +#' @return integer vector length \code{n}, values in \code{1:K} +#' @keywords internal +build_crossfit_folds_edid <- function(n, K = 5L, seed = NULL) { + if (!is.null(seed)) { + old_seed <- if (exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) { + get(".Random.seed", envir = .GlobalEnv) + } else { + NULL + } + on.exit({ + if (is.null(old_seed)) { + if (exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) + rm(".Random.seed", envir = .GlobalEnv) + } else { + assign(".Random.seed", old_seed, envir = .GlobalEnv) + } + }, add = TRUE) + set.seed(seed) + } + sample(seq_len(K), n, replace = TRUE) +} + +# --------------------------------------------------------------------------- +# B-spline basis construction and prediction +# --------------------------------------------------------------------------- + +#' Build B-spline basis matrix for a covariate matrix +#' +#' For the first covariate column, fits a B-spline basis with intercept +#' (\code{bs_df} columns). For each additional column, fits without intercept +#' (\code{bs_df - 1} columns, to avoid collinearity). Falls back to a linear +#' basis (intercept + raw column) if \code{splines::bs()} fails. +#' +#' @param X_mat numeric matrix, n x d. May also be a numeric vector (treated +#' as n x 1). +#' @param bs_df positive integer: degrees of freedom for the B-spline basis +#' (default 4) +#' +#' @return numeric matrix n x p, with attribute \code{"bs_objects"}: a list of +#' length d, each element the fitted \code{bs} object for that column (used +#' by \code{predict_basis_edid()} to evaluate on new data). +#' @keywords internal +build_basis_matrix_edid <- function(X_mat, bs_df = 4L) { + if (is.vector(X_mat)) X_mat <- matrix(X_mat, ncol = 1L) + n <- nrow(X_mat) + d <- ncol(X_mat) + + blocks <- vector("list", d) + bs_objects <- vector("list", d) + + for (k in seq_len(d)) { + xk <- X_mat[, k] + use_intercept <- (k == 1L) + # All warnings from splines::bs() are benign in cross-fitting contexts: + # "boundary knots" fires when test covariates are outside the training range; + # "interior knots match" fires for binary/factor-derived dummy columns. + # Errors are caught and fall back to a linear basis. + bs_result <- tryCatch( + suppressWarnings(splines::bs(xk, df = bs_df, intercept = use_intercept)), + error = function(e) NULL + ) + if (!is.null(bs_result)) { + bs_objects[[k]] <- bs_result + blocks[[k]] <- as.matrix(bs_result) + } else { + warning(sprintf("B-spline basis failed for covariate column %d; using linear basis.", k)) + bs_objects[[k]] <- list(fallback = TRUE, use_intercept = use_intercept) + blocks[[k]] <- if (use_intercept) cbind(1, xk) else matrix(xk, ncol = 1L) + } + } + + B <- do.call(cbind, blocks) + attr(B, "bs_objects") <- bs_objects + B +} + +#' Predict B-spline basis at new data using stored knot information +#' +#' Evaluates the basis used during training (stored as \code{bs} objects) at +#' new covariate values. When the training basis fell back to a linear basis, +#' returns the linear approximation. +#' +#' @param bs_obj_list list of length d: the \code{"bs_objects"} attribute from +#' \code{build_basis_matrix_edid()} +#' @param X_new_mat numeric matrix, n_test x d +#' +#' @return numeric matrix n_test x p (same column count as training basis) +#' @keywords internal +predict_basis_edid <- function(bs_obj_list, X_new_mat) { + if (is.vector(X_new_mat)) X_new_mat <- matrix(X_new_mat, ncol = 1L) + d <- length(bs_obj_list) + blocks <- vector("list", d) + + for (k in seq_len(d)) { + xk <- X_new_mat[, k] + bsk <- bs_obj_list[[k]] + # NULL or fallback sentinel -> linear basis + is_fallback <- is.null(bsk) || (is.list(bsk) && isTRUE(bsk$fallback)) + if (is_fallback) { + use_intercept <- if (is.list(bsk) && !is.null(bsk$use_intercept)) bsk$use_intercept else (k == 1L) + blocks[[k]] <- if (use_intercept) cbind(1, xk) else matrix(xk, ncol = 1L) + } else { + # Suppress the splines::bs() "beyond boundary knots" warning that fires + # whenever a cross-fitting test fold contains covariate values outside the + # training fold's knot range. This is a normal artifact of random splits + # and does not affect prediction correctness. All other warnings propagate. + blocks[[k]] <- withCallingHandlers( + predict(bsk, newx = xk), + warning = function(w) { + if (grepl("boundary knots", conditionMessage(w), fixed = TRUE)) + invokeRestart("muffleWarning") + } + ) + } + } + + do.call(cbind, blocks) +} + +# --------------------------------------------------------------------------- +# Propensity ratio estimation +# --------------------------------------------------------------------------- + +#' Estimate the propensity ratio r(X) = P(G=g|X) / P(G=g'|X) +#' +#' Implements the sieve (B-spline) estimator for the propensity ratio from +#' Chen, Sant'Anna & Xie (2025) Eq. (4.1)-(4.2). The ratio is estimated via +#' OLS minimising \eqn{E[r(X)^2 G_{g'} - 2 r(X) G_g]}. +#' +#' Closed form: +#' \deqn{\hat\beta = [B_{g'}' B_{g'}]^{-1} \sum_{i: G_i = g} B(X_i)} +#' Then \eqn{\hat r(X_i) = B(X_i)' \hat\beta}, clipped to [0, Inf). +#' +#' @param X_train numeric matrix n_train x d +#' @param G_train numeric vector n_train: cohort values (Inf for never-treated) +#' @param X_test numeric matrix n_test x d +#' @param g scalar: target treatment cohort +#' @param gp scalar: comparison cohort (may be Inf for never-treated) +#' @param bs_df integer: B-spline degrees of freedom (default 4) +#' +#' @return numeric vector length n_test: estimated r(X) values, >= 0 +#' @keywords internal +estimate_propensity_ratio_edid <- function(X_train, G_train, X_test, g, gp, + bs_df = 4L) { + n_test <- nrow(X_test) + + # Masks for g and g' units in training data + mask_gp <- if (is.infinite(gp)) is.infinite(G_train) else (G_train == gp) + mask_g <- (G_train == g) + + n_gp <- sum(mask_gp) + n_g <- sum(mask_g) + + if (n_gp < 2L) { + warning(sprintf( + "estimate_propensity_ratio_edid: fewer than 2 units in g'=%g training fold; returning 0.", gp + )) + return(rep(0, n_test)) + } + if (n_g < 1L) { + warning(sprintf( + "estimate_propensity_ratio_edid: 0 units in g=%g training fold; returning 0.", g + )) + return(rep(0, n_test)) + } + + # Build basis on training data + B_train_obj <- build_basis_matrix_edid(X_train, bs_df) + B_train <- unclass(B_train_obj) + attr(B_train, "bs_objects") <- NULL + + B_gp <- B_train[mask_gp, , drop = FALSE] + col_sums_g <- colSums(B_train[mask_g, , drop = FALSE]) + + # beta_hat = [B_gp' B_gp]^{-1} * col_sums_g + BtB_gp <- t(B_gp) %*% B_gp + beta_hat <- as.vector(compute_pseudoinverse_edid(BtB_gp) %*% col_sums_g) + + # Predict on test data + B_test <- predict_basis_edid(attr(B_train_obj, "bs_objects"), X_test) + r_hat <- pmax(drop(B_test %*% beta_hat), 0) + + r_hat +} + +#' Check for extreme propensity ratios and warn once +#' @keywords internal +.check_extreme_ratios_edid <- function(r_vec, g, gp) { + if (any(is.finite(r_vec)) && max(r_vec, na.rm = TRUE) > 100) { + warning(sprintf( + "Extreme propensity ratios detected for g=%g, g'=%g (max > 100). Results may be unstable.", + g, gp + )) + } +} + +#' Estimate the conditional mean \eqn{E[Y_s - Y_1 | G=g', X]} +#' +#' Fits an OLS B-spline regression of \code{Y_delta} on \code{B(X)} using only +#' units with \code{G_train == gp}, then predicts for all test units. +#' +#' @param X_train numeric matrix n_train x d +#' @param Y_delta_train numeric vector n_train: Y_s - Y_1 for all training units +#' @param G_train numeric vector n_train: cohort values (Inf for never-treated) +#' @param X_test numeric matrix n_test x d +#' @param gp scalar: cohort to regress on (may be Inf) +#' @param bs_df integer: B-spline degrees of freedom (default 4) +#' +#' @return numeric vector length n_test +#' @keywords internal +estimate_conditional_mean_edid <- function(X_train, Y_delta_train, G_train, + X_test, gp, bs_df = 4L) { + n_test <- nrow(X_test) + + mask_gp <- if (is.infinite(gp)) is.infinite(G_train) else (G_train == gp) + n_gp <- sum(mask_gp) + + if (n_gp < 2L) { + fallback_val <- if (n_gp == 1L) Y_delta_train[mask_gp] else 0 + warning(sprintf( + "estimate_conditional_mean_edid: fewer than 2 units in g'=%g training fold; using constant.", gp + )) + return(rep(fallback_val, n_test)) + } + + X_gp <- X_train[mask_gp, , drop = FALSE] + y_gp <- Y_delta_train[mask_gp] + + B_gp_train_obj <- build_basis_matrix_edid(X_gp, bs_df) + B_gp_train <- unclass(B_gp_train_obj) + attr(B_gp_train, "bs_objects") <- NULL + + # Check minimum sample for basis dimension + p_basis <- ncol(B_gp_train) + if (n_gp <= p_basis) { + # Fewer obs than basis columns: use simpler 1-column basis (intercept only) + fit <- list(coef = mean(y_gp)) + m_hat <- rep(mean(y_gp), n_test) + return(m_hat) + } + + fit <- solve_ols_edid(B_gp_train, y_gp) + B_test <- predict_basis_edid(attr(B_gp_train_obj, "bs_objects"), X_test) + m_hat <- drop(B_test %*% fit$coef) + + m_hat +} + +# --------------------------------------------------------------------------- +# Cross-fitted nuisance estimation (aggregate over folds) +# --------------------------------------------------------------------------- + +#' Estimate propensity ratios for all comparison cohorts via cross-fitting +#' +#' For each unique \code{gp} in \code{pairs}, performs K-fold cross-fitting to +#' produce a full-sample n-vector of \eqn{\hat r_{g, g'}(X_i)}. +#' +#' @param panel_obj panel object with \code{covariate_matrix} and +#' \code{unit_cohorts} +#' @param g scalar: target treatment cohort +#' @param pairs data.frame with column \code{gp} +#' @param bs_df integer: B-spline df +#' @param K_folds integer: number of cross-fitting folds +#' @param fold_id integer vector length n: pre-generated fold assignments +#' +#' @return named list of n-vectors, keyed by \code{as.character(gp)} +#' @keywords internal +estimate_all_propensity_ratios <- function(panel_obj, g, pairs, bs_df, + K_folds, fold_id) { + n <- panel_obj$n + X_mat <- panel_obj$covariate_matrix + G_vec <- panel_obj$unit_cohorts + result <- list() + + unique_gps <- unique(pairs$gp) + + for (gp in unique_gps) { + r_full <- numeric(n) + + for (ell in seq_len(K_folds)) { + test_idx <- which(fold_id == ell) + train_idx <- which(fold_id != ell) + if (length(test_idx) == 0L) next + + r_full[test_idx] <- estimate_propensity_ratio_edid( + X_train = X_mat[train_idx, , drop = FALSE], + G_train = G_vec[train_idx], + X_test = X_mat[test_idx, , drop = FALSE], + g = g, + gp = gp, + bs_df = bs_df + ) + } + + .check_extreme_ratios_edid(r_full, g, gp) + result[[as.character(gp)]] <- r_full + } + + result +} + +#' Estimate conditional means for all (g', period) combinations via cross-fitting +#' +#' For each unique (gp, period) pair needed by the cell, performs K-fold +#' cross-fitting to produce a full-sample n-vector of +#' \eqn{\hat m_{g', \text{period}, 1}(X_i)}. +#' +#' @param panel_obj panel object with \code{covariate_matrix}, \code{unit_cohorts}, +#' \code{outcome_wide}, and \code{period_to_col} +#' @param pairs data.frame with columns \code{gp} and \code{tpre} +#' @param t_val scalar: target time period for this cell +#' @param bs_df integer: B-spline df +#' @param K_folds integer: number of cross-fitting folds +#' @param fold_id integer vector length n: pre-generated fold assignments +#' +#' @return named list of n-vectors, keyed by \code{paste0(gp, "_", period)} +#' @keywords internal +estimate_all_conditional_means <- function(panel_obj, pairs, t_val, bs_df, + K_folds, fold_id) { + n <- panel_obj$n + X_mat <- panel_obj$covariate_matrix + G_vec <- panel_obj$unit_cohorts + ow <- panel_obj$outcome_wide + t1_col <- panel_obj$period_to_col[[as.character(panel_obj$period_1)]] + result <- list() + + # Collect unique (gp, period) combinations needed + # We need m_{gp, t_val, 1}(X) and m_{gp, tpre, 1}(X) for each pair + unique_gps <- unique(pairs$gp) + unique_tpre <- unique(pairs$tpre) + + # Build full list of (gp, period) to estimate + combos <- unique(rbind( + data.frame(gp = pairs$gp, period = t_val), + data.frame(gp = pairs$gp, period = pairs$tpre) + )) + + for (ii in seq_len(nrow(combos))) { + gp <- combos$gp[ii] + period <- combos$period[ii] + key <- paste0(gp, "_", period) + + if (!is.null(result[[key]])) next # already computed + + period_col <- panel_obj$period_to_col[[as.character(period)]] + if (is.null(period_col)) { + warning(sprintf("estimate_all_conditional_means: period %g not found in panel.", period)) + result[[key]] <- rep(NA_real_, n) + next + } + + Y_delta <- ow[, period_col] - ow[, t1_col] # Y_{period} - Y_1 + m_full <- numeric(n) + + for (ell in seq_len(K_folds)) { + test_idx <- which(fold_id == ell) + train_idx <- which(fold_id != ell) + if (length(test_idx) == 0L) next + + m_full[test_idx] <- estimate_conditional_mean_edid( + X_train = X_mat[train_idx, , drop = FALSE], + Y_delta_train = Y_delta[train_idx], + G_train = G_vec[train_idx], + X_test = X_mat[test_idx, , drop = FALSE], + gp = gp, + bs_df = bs_df + ) + } + + result[[key]] <- m_full + } + + result +} diff --git a/R/edid-covariates.R b/R/edid-covariates.R new file mode 100644 index 0000000..4736b20 --- /dev/null +++ b/R/edid-covariates.R @@ -0,0 +1,2 @@ +# edid-covariates.R +# This file is intentionally empty. Covariate estimation is in edid-cov.R and edid-cov-eif.R. diff --git a/R/edid-data.R b/R/edid-data.R new file mode 100644 index 0000000..4637da5 --- /dev/null +++ b/R/edid-data.R @@ -0,0 +1,219 @@ +# edid-data.R +# Panel preparation and cluster indexing for the EDiD estimator. + +#' Prepare the panel object used throughout edid estimation +#' +#' Reshapes the long-format input \code{data} into a wide outcome matrix and +#' builds all masks and maps needed by downstream functions. +#' +#' @param data data.frame (or data.table / tibble) already validated +#' @param yname character scalar: outcome column name +#' @param idname character scalar: unit id column name +#' @param tname character scalar: time column name +#' @param gname character scalar: first-treatment-period column name +#' @param covariates NULL (stub) +#' @param clustervars character scalar or NULL +#' @param control_group \code{"nevertreated"} or \code{"notyettreated"} +#' @param anticipation non-negative integer +#' +#' @return a \code{panel_obj} list; see spec Section 5.1 +#' @keywords internal +prepare_edid_panel <- function( + data, yname, idname, tname, gname, + xformla = NULL, covariates = NULL, clustervars = NULL, + control_group = "nevertreated", + anticipation = 0L +) { + + # ----------------------------------------------------------------------- + # 1. Coerce to data.table and sort + # ----------------------------------------------------------------------- + dt <- data.table::as.data.table(data) + data.table::setkeyv(dt, c(idname, tname)) + + # ----------------------------------------------------------------------- + # 2-3. Extract sorted unique ids and time periods + # ----------------------------------------------------------------------- + all_units <- sort(unique(dt[[idname]])) + time_periods <- sort(unique(dt[[tname]])) + n <- length(all_units) + T_periods <- length(time_periods) + + # ----------------------------------------------------------------------- + # 4. period_1 and period_to_col map + # ----------------------------------------------------------------------- + period_1 <- time_periods[1L] + period_to_col <- stats::setNames( + seq_along(time_periods), + as.character(time_periods) + ) + + # ----------------------------------------------------------------------- + # 5. Pivot to wide outcome matrix (n x T_periods) + # Rows correspond to all_units (sorted), columns to time_periods (sorted) + # ----------------------------------------------------------------------- + wide_dt <- data.table::dcast( + dt, + formula = stats::as.formula(paste(idname, "~ ", tname)), + value.var = yname + ) + # Ensure rows in same order as all_units + setattr <- function(x, nm, val) { attr(x, nm) <- val; x } + unit_order <- match(all_units, wide_dt[[idname]]) + wide_dt <- wide_dt[unit_order, ] + + # Drop the unit id column; keep only the T_periods outcome columns + # Column names after dcast are as.character(time_periods) + col_order <- as.character(time_periods) + outcome_wide <- as.matrix(wide_dt[, col_order, with = FALSE]) + rownames(outcome_wide) <- NULL + colnames(outcome_wide) <- col_order + + # ----------------------------------------------------------------------- + # 6. unit_cohorts: gname value per unit (Inf for never-treated) + # ----------------------------------------------------------------------- + # Extract one gname per unit using base R tapply (avoids data.table NSE) + ft_vals <- dt[[gname]] + unit_id_vals <- dt[[idname]] + # Get first value of gname per unit (treatment is constant within unit) + unit_ft_map <- tapply(ft_vals, unit_id_vals, function(x) x[1L]) + # Map to all_units order + unit_cohorts <- as.numeric(unit_ft_map[match(all_units, names(unit_ft_map))]) + + # ----------------------------------------------------------------------- + # 7. Handle notyettreated control group + # ----------------------------------------------------------------------- + if (control_group == "notyettreated") { + finite_cohorts <- unit_cohorts[is.finite(unit_cohorts)] + last_g <- max(finite_cohorts) + # Relabel last cohort as Inf (never-treated for estimation purposes) + unit_cohorts[unit_cohorts == last_g] <- Inf + # Trim time periods >= last_g + keep_times <- time_periods[time_periods < last_g] + keep_cols <- as.character(keep_times) + outcome_wide <- outcome_wide[, keep_cols, drop = FALSE] + time_periods <- keep_times + T_periods <- length(time_periods) + period_1 <- time_periods[1L] + period_to_col <- stats::setNames( + seq_along(time_periods), + as.character(time_periods) + ) + } + + # ----------------------------------------------------------------------- + # 8. treatment_groups: sorted unique finite cohort values + # ----------------------------------------------------------------------- + treatment_groups <- sort(unique(unit_cohorts[is.finite(unit_cohorts)])) + + # ----------------------------------------------------------------------- + # 9. cohort_masks: named list, one logical vector per cohort + # ----------------------------------------------------------------------- + cohort_masks <- vector("list", length(treatment_groups)) + names(cohort_masks) <- as.character(treatment_groups) + for (g_val in treatment_groups) { + cohort_masks[[as.character(g_val)]] <- (unit_cohorts == g_val) + } + + # ----------------------------------------------------------------------- + # 10. never_treated_mask + # ----------------------------------------------------------------------- + never_treated_mask <- is.infinite(unit_cohorts) + + # ----------------------------------------------------------------------- + # 11. cohort_fractions: pi_g = n_g / n + # ----------------------------------------------------------------------- + cohort_fractions <- stats::setNames( + vapply(treatment_groups, function(g_val) sum(unit_cohorts == g_val) / n, + numeric(1L)), + as.character(treatment_groups) + ) + + # ----------------------------------------------------------------------- + # 12. Clustering + # ----------------------------------------------------------------------- + cluster_indices <- NULL + n_clusters <- NULL + if (!is.null(clustervars)) { + cluster_indices <- build_cluster_index(dt, idname, clustervars, all_units) + n_clusters <- length(unique(cluster_indices)) + } + + # ----------------------------------------------------------------------- + # 13. Covariate matrix extraction + # ----------------------------------------------------------------------- + covariate_matrix <- NULL + is_trivial_xformla <- is.null(xformla) || + identical(deparse(xformla, width.cutoff = 500L), "~1") + + if (!is_trivial_xformla) { + rhs_vars <- all.vars(xformla) + if (length(rhs_vars) > 0L) { + # Extract one row per unit (time-invariant covariates enforced by validation) + # Use the first time period for each unit + first_rows <- match(all_units, dt[[idname]]) + cov_df <- as.data.frame(dt)[first_rows, , drop = FALSE] + + # Use model.matrix() to properly expand the formula + # This handles I(), interactions, poly(), factors via dummy coding + mm <- stats::model.matrix(xformla, data = cov_df) + + # Remove intercept column if present (estimator handles centering) + intercept_col <- which(colnames(mm) == "(Intercept)") + if (length(intercept_col) > 0L) { + mm <- mm[, -intercept_col, drop = FALSE] + } + + covariate_matrix <- unname(mm) + rownames(covariate_matrix) <- NULL + } + } + + # ----------------------------------------------------------------------- + # Assemble panel_obj + # ----------------------------------------------------------------------- + panel_obj <- list( + n = n, + T_periods = T_periods, + outcome_wide = outcome_wide, + time_periods = time_periods, + period_1 = period_1, + period_to_col = period_to_col, + all_units = all_units, + unit_cohorts = unit_cohorts, + treatment_groups = treatment_groups, + cohort_masks = cohort_masks, + never_treated_mask = never_treated_mask, + cohort_fractions = cohort_fractions, + cluster_indices = cluster_indices, + n_clusters = n_clusters, + covariate_matrix = covariate_matrix, + xformla = xformla, + control_group = control_group, + anticipation = as.integer(anticipation) + ) + + panel_obj +} + +#' Build cluster integer index from cluster id column +#' +#' @param dt data.table (long format), sorted by unit then time +#' @param idname character scalar: unit id column name +#' @param clustervars character scalar: cluster id column name +#' @param all_units sorted vector of unique unit ids +#' +#' @return integer vector length n (values 1..G) +#' @keywords internal +build_cluster_index <- function(dt, idname, clustervars, all_units) { + # Extract time-invariant cluster id per unit using base R tapply + cl_vals <- dt[[clustervars]] + unit_id_vals <- dt[[idname]] + cl_map <- tapply(cl_vals, unit_id_vals, function(x) x[1L]) + cl_ids <- cl_map[match(all_units, names(cl_map))] + + # Map cluster id -> integer index + sorted_cl <- sort(unique(cl_ids)) + cl_index <- match(cl_ids, sorted_cl) + cl_index +} diff --git a/R/edid-fit.R b/R/edid-fit.R new file mode 100644 index 0000000..77d5745 --- /dev/null +++ b/R/edid-fit.R @@ -0,0 +1,253 @@ +# edid-fit.R +# Outer (g, t) cell loop for the EDiD estimator. + +#' Fit all (g, t) cells for the EDiD estimator +#' +#' Iterates over all treatment cohorts and all time periods (excluding +#' \code{period_1}), computes point estimates, EIFs, and analytical SEs for +#' each cell. +#' +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param pt_assumption character: \code{"all"} or \code{"post"} +#' @param alpha significance level in (0, 1) +#' @param store_eif logical: if TRUE, include EIF vectors in returned cells +#' @param xformla one-sided formula or NULL: covariate formula (routed to +#' covariate path when non-trivial and \code{panel_obj$covariate_matrix} +#' is non-NULL) +#' @param need_eif logical: if TRUE, always store EIF regardless of store_eif +#' (used internally when \code{n_bootstrap > 0}) +#' +#' @return list with elements: +#' \describe{ +#' \item{\code{cells}}{list of \code{edid_cell_result} objects} +#' \item{\code{eif_matrix}}{n x n_valid_cells numeric matrix, or NULL} +#' \item{\code{cell_index}}{data.frame: group, time, cell_id, is_pre} +#' } +#' @keywords internal +fit_edid_cells <- function( + panel_obj, pt_assumption, alpha, store_eif, xformla = NULL, seed = NULL, + need_eif = FALSE +) { + # Determine if covariate path is active + is_trivial_xformla <- is.null(xformla) || + identical(deparse(xformla, width.cutoff = 500L), "~1") + use_cov_path <- !is_trivial_xformla && !is.null(panel_obj$covariate_matrix) + + # Cross-fitting fold assignment (global, reused across all cells) + if (use_cov_path) { + fold_id <- build_crossfit_folds_edid(n = panel_obj$n, K = 5L, seed = seed) + } else { + fold_id <- NULL + } + + tgroups <- panel_obj$treatment_groups + tperiods <- panel_obj$time_periods + period_1 <- panel_obj$period_1 + n <- panel_obj$n + + # Periods to iterate over: all except period_1 + iter_periods <- tperiods[tperiods != period_1] + + # Pre-allocate cell list + n_cells <- length(tgroups) * length(iter_periods) + cells <- vector("list", n_cells) + + # cell_index tracking + ci_group <- numeric(n_cells) + ci_time <- numeric(n_cells) + ci_cell_id <- integer(n_cells) + ci_is_pre <- logical(n_cells) + + keep_eif <- store_eif || need_eif + eif_list <- if (keep_eif) vector("list", n_cells) else NULL + + cell_id <- 0L + n_extreme_ratio_instances <- 0L # accumulate extreme-ratio warnings; emit once at end + + for (g in tgroups) { + for (t in iter_periods) { + cell_id <- cell_id + 1L + is_pre <- (t < g) + + # Step 1: enumerate valid pairs + pairs <- enumerate_valid_pairs_edid( + target_g = g, + treatment_groups = tgroups, + time_periods = tperiods, + period_1 = period_1, + pt_assumption = pt_assumption, + anticipation = panel_obj$anticipation + ) + + # NA cell if no valid pairs + if (nrow(pairs) == 0L) { + cells[[cell_id]] <- list( + group = g, + time = t, + att = NA_real_, + se = NA_real_, + ci_lower = NA_real_, + ci_upper = NA_real_, + t_stat = NA_real_, + p_value = NA_real_, + n_pairs = 0L, + weights = NULL, + condition_num = NA_real_, + is_pre = is_pre, + inference_valid = FALSE, + eif = NULL + ) + ci_group[cell_id] <- g + ci_time[cell_id] <- t + ci_cell_id[cell_id] <- cell_id + ci_is_pre[cell_id] <- is_pre + if (keep_eif) eif_list[[cell_id]] <- rep(NA_real_, n) + next + } + + # Covariate nuisance estimates (per cell) + prop_ratios <- NULL + cond_means <- NULL + + if (use_cov_path) { + # Build nuisance estimation pairs. + # For Eq. (4.4), we need nuisances for: + # - r[g, Inf] always (never-treated propensity ratio) + # - r[g, gp] for each cross-cohort gp + # - m_{Inf, t} and m_{Inf, tpre} for never-treated conditional means + # - m_{gp, tpre} for each cross-cohort gp's pretrend + # Build pairs_for_nuisance that includes Inf + all cross-cohort gps + pairs_for_nuisance <- pairs + # Self-comparison pairs use Inf as comparison + self_cmp <- is.finite(pairs_for_nuisance$gp) & (pairs_for_nuisance$gp == g) + if (any(self_cmp)) { + pairs_for_nuisance$gp[self_cmp] <- Inf + } + # Ensure Inf is always present for never-treated nuisances + # (needed even for cross-cohort pairs) + cross_pairs <- pairs[is.finite(pairs$gp) & pairs$gp != g, , drop = FALSE] + if (nrow(cross_pairs) > 0L) { + # Add Inf-based pairs for the same tpre values (for m_{Inf,tpre}) + inf_pairs <- data.frame(gp = Inf, tpre = unique(cross_pairs$tpre)) + pairs_for_nuisance <- rbind(pairs_for_nuisance, inf_pairs) + pairs_for_nuisance <- unique(pairs_for_nuisance) + } + + prop_ratios <- withCallingHandlers( + estimate_all_propensity_ratios( + panel_obj = panel_obj, + g = g, + pairs = pairs_for_nuisance, + bs_df = 4L, + K_folds = 5L, + fold_id = fold_id + ), + warning = function(w) { + if (grepl("Extreme propensity ratios", conditionMessage(w), fixed = TRUE)) { + n_extreme_ratio_instances <<- n_extreme_ratio_instances + 1L + invokeRestart("muffleWarning") + } + } + ) + cond_means <- estimate_all_conditional_means( + panel_obj = panel_obj, + pairs = pairs_for_nuisance, + t_val = t, + bs_df = 4L, + K_folds = 5L, + fold_id = fold_id + ) + } + + # Steps 2-6: dispatch on covariate vs. no-covariate path + if (use_cov_path) { + # --- Covariate path --- + gen_out_mat <- compute_generated_outcomes_cov_edid( + panel_obj = panel_obj, + g = g, + t = t, + pairs = pairs, + prop_ratios = prop_ratios, + cond_means = cond_means, + pt_assumption = pt_assumption + ) + omega <- compute_omega_star_cov_edid(panel_obj, g, t, pairs, + prop_ratios, cond_means) + cond_num <- tryCatch(check_condition_edid(omega), error = function(e) NA_real_) + weights <- compute_efficient_weights_edid(omega) + # ATT: weighted mean of column means of gen_out_mat + col_means <- colMeans(gen_out_mat, na.rm = TRUE) + att_gt <- sum(weights * col_means) + eif_gt <- compute_eif_cov_edid(panel_obj, gen_out_mat, weights, att_gt, g) + } else { + # --- No-covariate path --- + y_hat <- compute_generated_outcomes_nocov_edid(g, t, pairs, panel_obj, pt_assumption) + omega <- compute_omega_star_nocov_edid(g, t, pairs, panel_obj, pt_assumption) + cond_num <- tryCatch(check_condition_edid(omega), error = function(e) NA_real_) + weights <- compute_efficient_weights_edid(omega) + att_gt <- sum(weights * y_hat) + eif_gt <- compute_eif_nocov_edid(g, t, pairs, weights, panel_obj, att_gt, pt_assumption) + } + + # Step 7: SE and inference + inf_res <- safe_inference_edid(eif_gt, panel_obj$cluster_indices, alpha, att_gt) + + # Step 8: store + eif_store <- if (keep_eif) eif_gt else NULL + cells[[cell_id]] <- list( + group = g, + time = t, + att = att_gt, + se = inf_res$se, + ci_lower = inf_res$ci_lower, + ci_upper = inf_res$ci_upper, + t_stat = inf_res$t_stat, + p_value = inf_res$p_value, + n_pairs = nrow(pairs), + weights = weights, + condition_num = cond_num, + is_pre = is_pre, + inference_valid = inf_res$inference_valid, + eif = eif_store + ) + + ci_group[cell_id] <- g + ci_time[cell_id] <- t + ci_cell_id[cell_id] <- cell_id + ci_is_pre[cell_id] <- is_pre + + if (keep_eif) eif_list[[cell_id]] <- eif_gt + } + } + + if (n_extreme_ratio_instances > 0L) { + warning(sprintf( + "Extreme propensity ratios detected (max > 100) in %d estimation step(s). Results may be unstable.", + n_extreme_ratio_instances + )) + } + + # Build EIF matrix if needed + eif_matrix <- NULL + if (keep_eif) { + # Stack EIF vectors as columns: n x n_cells matrix + eif_matrix <- do.call(cbind, eif_list) + if (is.null(dim(eif_matrix))) { + eif_matrix <- matrix(eif_matrix, nrow = n) + } + } + + cell_index <- data.frame( + group = ci_group, + time = ci_time, + cell_id = ci_cell_id, + is_pre = ci_is_pre, + stringsAsFactors = FALSE + ) + + list( + cells = cells, + eif_matrix = eif_matrix, + cell_index = cell_index + ) +} diff --git a/R/edid-imports.R b/R/edid-imports.R new file mode 100644 index 0000000..29f7d35 --- /dev/null +++ b/R/edid-imports.R @@ -0,0 +1,11 @@ +# edid-imports.R +# @importFrom declarations for symbols used in edid-*.R files that are not +# already declared in imports.R. +# +# Note: stats::pnorm, stats::qnorm, stats::quantile, stats::sd, stats::setNames +# are already declared in imports.R. We add only symbols NOT already present. +# data.table is @imported (not just @importFrom) in imports.R, so dcast is +# available without an additional entry, but we add explicit entries for clarity. + +#' @importFrom stats sd quantile as.formula +NULL diff --git a/R/edid-inference.R b/R/edid-inference.R new file mode 100644 index 0000000..653123b --- /dev/null +++ b/R/edid-inference.R @@ -0,0 +1,102 @@ +# edid-inference.R +# Analytical standard error and inference helpers for the EDiD estimator. + +#' Safely compute SE, CI, and p-value from an EIF vector +#' +#' Dispatches to \code{compute_eif_se_edid()} with optional cluster aggregation. +#' If the resulting SE is not valid (zero, NA, or non-finite), all inference +#' results are set to \code{NA}. +#' +#' @param eif numeric vector length n (or NULL, for NA cells) +#' @param cluster_indices integer vector length n (1..G) or NULL +#' @param alpha significance level in (0, 1) +#' @param att scalar ATT estimate (used for t-stat; may be NA for inference check) +#' +#' @return named list: +#' \code{se}, \code{ci_lower}, \code{ci_upper}, \code{t_stat}, +#' \code{p_value}, \code{inference_valid} +#' @keywords internal +safe_inference_edid <- function(eif, cluster_indices = NULL, alpha = 0.05, + att = NA_real_) { + na_result <- list( + se = NA_real_, + ci_lower = NA_real_, + ci_upper = NA_real_, + t_stat = NA_real_, + p_value = NA_real_, + inference_valid = FALSE + ) + + if (is.null(eif) || !is.numeric(eif) || length(eif) == 0L) { + return(na_result) + } + + if (is.null(cluster_indices)) { + n <- length(eif) + se <- compute_eif_se_edid(eif, n) + } else { + G <- length(unique(cluster_indices)) + cluster_sums <- drop(rowsum(eif, cluster_indices)) + se <- sqrt((G / (G - 1)) * sum(cluster_sums^2) / (length(eif)^2)) + } + + valid_se <- is.finite(se) && se > EDID_SE_EPS + + if (!valid_se) return(na_result) + + # att must be finite for CIs and p-value to be meaningful. + # If att is NA/non-finite, return the SE but mark inference as invalid + # (inference_valid = FALSE signals that CIs and p-value are not available). + if (!is.finite(att)) { + return(list( + se = se, + ci_lower = NA_real_, + ci_upper = NA_real_, + t_stat = NA_real_, + p_value = NA_real_, + inference_valid = FALSE + )) + } + + z_crit <- qnorm(1 - alpha / 2) + t_stat <- att / se + p_value <- 2 * pnorm(-abs(t_stat)) + + list( + se = se, + ci_lower = att - z_crit * se, + ci_upper = att + z_crit * se, + t_stat = t_stat, + p_value = p_value, + inference_valid = TRUE + ) +} + +#' Compute SE from EIF vector +#' +#' \deqn{SE = \sqrt{\sum_i \text{eif}_i^2 / n^2}} +#' +#' @param eif_vec numeric vector (may be cluster-aggregated sums) +#' @param n integer denominator (number of units or clusters) +#' +#' @return scalar SE +#' @keywords internal +compute_eif_se_edid <- function(eif_vec, n) { + sqrt(sum(eif_vec^2) / (n^2)) +} + +#' Aggregate EIF to cluster level (centered) +#' +#' Returns the vector of cluster sums of \code{eif}, mean-subtracted. +#' The small-sample correction \eqn{G/(G-1)} is applied in the SE formula +#' (in \code{safe_inference_edid}), not here. +#' +#' @param eif numeric vector length n +#' @param cluster_indices integer vector length n (values 1..G) +#' +#' @return numeric vector length G (cluster sums, centered) +#' @keywords internal +cluster_aggregate_edid <- function(eif, cluster_indices) { + cluster_sums <- drop(rowsum(eif, cluster_indices)) # length G + cluster_sums - mean(cluster_sums) +} diff --git a/R/edid-linalg.R b/R/edid-linalg.R new file mode 100644 index 0000000..cfa2309 --- /dev/null +++ b/R/edid-linalg.R @@ -0,0 +1,74 @@ +# edid-linalg.R +# Linear algebra helpers for the EDiD estimator. +# Uses base R svd() for pseudoinverse -- no MASS dependency. + +#' SVD-based Moore-Penrose pseudoinverse +#' +#' @param mat numeric matrix +#' @param tol tolerance for zero singular values; defaults to +#' \code{max(dim(mat)) * max(svd$d) * .Machine$double.eps} +#' @return matrix of same dimensions as \code{t(mat)} +#' @keywords internal +compute_pseudoinverse_edid <- function(mat, tol = NULL) { + s <- svd(mat) + d <- s$d + if (is.null(tol)) { + tol <- max(dim(mat)) * max(c(d, 0)) * .Machine$double.eps + } + # Zero out singular values below tolerance + d_inv <- ifelse(d > tol, 1 / d, 0) + # Pseudoinverse: V diag(d_inv) U' + s$v %*% diag(d_inv, nrow = length(d_inv)) %*% t(s$u) +} + +#' Condition number of a matrix via SVD +#' +#' @param mat numeric matrix +#' @return scalar: max singular value / min positive singular value. +#' Returns \code{Inf} if min singular value is 0. +#' @keywords internal +check_condition_edid <- function(mat) { + d <- svd(mat, nu = 0L, nv = 0L)$d + if (length(d) == 0L || max(d) == 0) return(Inf) + min_pos <- min(d[d > 0]) + if (length(min_pos) == 0L) return(Inf) + max(d) / min_pos +} + +#' Weighted OLS helper +#' +#' Computes \eqn{\hat\beta = (X'WX)^{-1} X'Wy} using \code{.lm.fit()}. +#' Falls back to SVD-based pseudoinverse if the normal equations are +#' numerically singular. +#' +#' @param X numeric matrix (n x p) +#' @param y numeric vector length n +#' @param weights numeric vector length n (NULL = uniform) +#' @return named list with elements \code{coef}, \code{fitted}, \code{residuals} +#' @keywords internal +solve_ols_edid <- function(X, y, weights = NULL) { + n <- nrow(X) + if (is.null(weights)) { + weights <- rep(1, n) + } + W <- sqrt(weights) + Xw <- X * W + yw <- y * W + fit <- tryCatch( + .lm.fit(Xw, yw), + error = function(e) NULL + ) + if (!is.null(fit) && all(is.finite(fit$coefficients))) { + beta <- fit$coefficients + yhat <- drop(X %*% beta) + resid <- y - yhat + return(list(coef = beta, fitted = yhat, residuals = resid)) + } + # Fallback: pseudoinverse + XtWX <- t(Xw) %*% Xw + XtWy <- t(Xw) %*% yw + beta <- drop(compute_pseudoinverse_edid(XtWX) %*% XtWy) + yhat <- drop(X %*% beta) + resid <- y - yhat + list(coef = beta, fitted = yhat, residuals = resid) +} diff --git a/R/edid-methods.R b/R/edid-methods.R new file mode 100644 index 0000000..60f66f3 --- /dev/null +++ b/R/edid-methods.R @@ -0,0 +1,324 @@ +# edid-methods.R +# S3 methods for the edid_fit class. + +#' Print method for edid_fit objects +#' +#' Displays the ATT(g,t) table in the same style as \code{print.MP} / +#' \code{summary.MP}, followed by footer metadata. +#' +#' @param x an \code{edid_fit} object +#' @param ... additional arguments (currently ignored) +#' +#' @return \code{x} invisibly +#' @export +print.edid_fit <- function(x, ...) { + cat("\n") + cat("Call:\n") + print(x$call) + cat("\n") + + cat("Group-Time Average Treatment Effects:\n") + + alp <- x$alpha + # CI label: pointwise for analytical; simult if bootstrap + cband_text1a <- paste0(100 * (1 - alp), "% ") + cband_text1b <- ifelse(isTRUE(x$bstrap), "Simult. ", "Pointwise ") + cband_text1 <- paste0("[", cband_text1a, cband_text1b) + + att_df <- x$att_gt + if (!is.null(att_df) && nrow(att_df) > 0L) { + ci_lower <- att_df$ci_lower + ci_upper <- att_df$ci_upper + + sig <- (ci_upper < 0) | (ci_lower > 0) + sig[is.na(sig)] <- FALSE + sig_text <- ifelse(sig, "*", "") + + out <- cbind.data.frame( + att_df$group, + att_df$time, + att_df$att, + att_df$se, + ci_lower, + ci_upper + ) + out <- round(out, 4) + out <- cbind.data.frame(out, sig_text) + colnames(out) <- c("Group", "Time", "ATT(g,t)", "Std. Error", + cband_text1, "Conf. Band]", "") + print(out, row.names = FALSE) + } else { + cat(" (no cells)\n") + } + + cat("---\n") + cat("Signif. codes: `*' confidence band does not cover 0") + cat("\n\n") + + # Control group footer + cg <- x$control_group + cg_text <- if (cg == "nevertreated") "Never Treated" else + if (cg == "notyettreated") "Not Yet Treated" else cg + cat("Control Group: ") + cat(cg_text) + cat(", ") + cat("Anticipation Periods: ") + cat(x$anticipation) + cat("\n") + + cat("Estimation Method: Efficient DiD (Chen, Sant'Anna & Xie 2025)\n") + + pt_text <- if (x$pt_assumption == "all") "PT-All" else "PT-Post" + cat("PT Assumption: ") + cat(pt_text) + cat("\n") + + invisible(x) +} + +#' Summary method for edid_fit objects +#' +#' Prints a structured summary of the EDiD estimation results including +#' the ATT(g,t) table (in MP style) plus aggregated overall, event-study, +#' and group estimates. +#' +#' @param object an \code{edid_fit} object +#' @param ... additional arguments (currently ignored) +#' +#' @return \code{object} invisibly +#' @export +summary.edid_fit <- function(object, ...) { + # First, print the ATT(g,t) table in MP format + print.edid_fit(object, ...) + + # Overall ATT + if (!is.null(object$overall)) { + cat("\n--- Overall ATT ---\n") + ov <- object$overall + cat(sprintf(" ATT = %.4f SE = %s CI = [%s, %s] p = %s\n", + ov$att, + .fmt_or_na(ov$se), + .fmt_or_na(ov$ci_lower), + .fmt_or_na(ov$ci_upper), + .fmt_or_na(ov$p_value))) + } + + # Event-study + if (!is.null(object$event_study) && length(object$event_study) > 0L) { + cat("\n--- Event-Study ATT(e) ---\n") + for (es in object$event_study) { + cat(sprintf(" e = %3g: ATT = %.4f SE = %s p = %s\n", + es$e, + es$att, + .fmt_or_na(es$se), + .fmt_or_na(es$p_value))) + } + } + + # Group + if (!is.null(object$group) && length(object$group) > 0L) { + cat("\n--- Group ATT(g) ---\n") + for (gr in object$group) { + cat(sprintf(" g = %g: ATT = %.4f SE = %s p = %s\n", + gr$group, + gr$att, + .fmt_or_na(gr$se), + .fmt_or_na(gr$p_value))) + } + } + + cat("\n") + invisible(object) +} + +# Internal formatting helper +.fmt_or_na <- function(x) { + if (is.null(x) || !is.finite(x)) "NA" else sprintf("%.4f", x) +} + +#' Extract ATT coefficients from an edid_fit object +#' +#' @param object an \code{edid_fit} object +#' @param which character: one of \code{"att_gt"}, \code{"overall"}, +#' \code{"event_study"}, \code{"group"} +#' @param ... additional arguments (ignored) +#' +#' @return named numeric vector of ATT estimates +#' @export +coef.edid_fit <- function( + object, + which = c("att_gt", "overall", "event_study", "group"), + ... +) { + which <- match.arg(which) + switch(which, + att_gt = { + df <- object$att_gt + nms <- paste0("ATT(", df$group, ",", df$time, ")") + stats::setNames(df$att, nms) + }, + overall = { + c(overall = object$overall$att) + }, + event_study = { + vals <- vapply(object$event_study, function(x) x$att, numeric(1L)) + es <- object$event_study + nms <- vapply(es, function(x) paste0("e=", x$e), character(1L)) + stats::setNames(vals, nms) + }, + group = { + vals <- vapply(object$group, function(x) x$att, numeric(1L)) + nms <- vapply(object$group, function(x) paste0("g=", x$group), character(1L)) + stats::setNames(vals, nms) + } + ) +} + +#' Extract variance-covariance matrix from an edid_fit object +#' +#' Returns the outer product of aggregated EIF vectors, scaled by \eqn{1/n^2}. +#' When bootstrap inference is used, returns a diagonal matrix of bootstrap +#' variance estimates. +#' +#' @param object an \code{edid_fit} object +#' @param which character: one of \code{"att_gt"}, \code{"overall"}, +#' \code{"event_study"}, \code{"group"} +#' @param ... additional arguments (ignored) +#' +#' @return square numeric matrix +#' @export +vcov.edid_fit <- function( + object, + which = c("att_gt", "overall", "event_study", "group"), + ... +) { + which <- match.arg(which) + n <- object$n + + if (which == "overall") { + eif_v <- object$overall$eif_agg + if (is.null(eif_v)) return(matrix(NA_real_, 1L, 1L)) + return(matrix(sum(eif_v^2) / n^2, nrow = 1L, ncol = 1L, + dimnames = list("overall", "overall"))) + } + + if (which == "event_study") { + es_list <- object$event_study + if (is.null(es_list) || length(es_list) == 0L) return(matrix(NA_real_, 0L, 0L)) + nms <- vapply(es_list, function(x) paste0("e=", x$e), character(1L)) + K <- length(es_list) + vcv <- matrix(NA_real_, K, K, dimnames = list(nms, nms)) + for (j in seq_len(K)) { + eif_j <- es_list[[j]]$eif_agg + if (is.null(eif_j)) next + for (k in seq_len(K)) { + eif_k <- es_list[[k]]$eif_agg + if (is.null(eif_k)) next + vcv[j, k] <- sum(eif_j * eif_k) / n^2 + } + } + return(vcv) + } + + if (which == "group") { + gr_list <- object$group + if (is.null(gr_list) || length(gr_list) == 0L) return(matrix(NA_real_, 0L, 0L)) + nms <- vapply(gr_list, function(x) paste0("g=", x$group), character(1L)) + K <- length(gr_list) + vcv <- matrix(NA_real_, K, K, dimnames = list(nms, nms)) + for (j in seq_len(K)) { + eif_j <- gr_list[[j]]$eif_agg + if (is.null(eif_j)) next + for (k in seq_len(K)) { + eif_k <- gr_list[[k]]$eif_agg + if (is.null(eif_k)) next + vcv[j, k] <- sum(eif_j * eif_k) / n^2 + } + } + return(vcv) + } + + # att_gt: use cell-level EIFs from eif_matrix if available + if (!is.null(object$eif)) { + eif_mat <- object$eif + K <- ncol(eif_mat) + vcv <- (t(eif_mat) %*% eif_mat) / n^2 + df <- object$att_gt + nms <- paste0("ATT(", df$group, ",", df$time, ")") + dimnames(vcv) <- list(nms, nms) + return(vcv) + } + # Fallback: diagonal from stored SEs + df <- object$att_gt + nms <- paste0("ATT(", df$group, ",", df$time, ")") + K <- nrow(df) + vcv <- diag(df$se^2, nrow = K) + dimnames(vcv) <- list(nms, nms) + vcv +} + +#' Coerce edid_fit to a data.frame +#' +#' @param x an \code{edid_fit} object +#' @param row.names ignored; included for S3 generic consistency +#' @param optional ignored; included for S3 generic consistency +#' @param ... not used; reserved for \code{which} (see Details) +#' @param which character: one of \code{"att_gt"}, \code{"overall"}, +#' \code{"event_study"}, \code{"group"} +#' +#' @return data.frame +#' @export +as.data.frame.edid_fit <- function( + x, + row.names = NULL, + optional = FALSE, + ..., + which = c("att_gt", "overall", "event_study", "group") +) { + which <- match.arg(which) + switch(which, + att_gt = { + x$att_gt + }, + overall = { + ov <- x$overall + data.frame( + att = ov$att, + se = ov$se, + ci_lower = ov$ci_lower, + ci_upper = ov$ci_upper, + t_stat = ov$t_stat, + p_value = ov$p_value, + stringsAsFactors = FALSE + ) + }, + event_study = { + es_list <- x$event_study + if (is.null(es_list) || length(es_list) == 0L) { + return(data.frame(e = numeric(0L), att = numeric(0L), se = numeric(0L), + ci_lower = numeric(0L), ci_upper = numeric(0L), + p_value = numeric(0L))) + } + do.call(rbind, lapply(es_list, function(es) { + data.frame(e = es$e, att = es$att, se = es$se, + ci_lower = es$ci_lower, ci_upper = es$ci_upper, + t_stat = es$t_stat, p_value = es$p_value, + stringsAsFactors = FALSE) + })) + }, + group = { + gr_list <- x$group + if (is.null(gr_list) || length(gr_list) == 0L) { + return(data.frame(group = numeric(0L), att = numeric(0L), se = numeric(0L), + ci_lower = numeric(0L), ci_upper = numeric(0L), + p_value = numeric(0L))) + } + do.call(rbind, lapply(gr_list, function(gr) { + data.frame(group = gr$group, att = gr$att, se = gr$se, + ci_lower = gr$ci_lower, ci_upper = gr$ci_upper, + t_stat = gr$t_stat, p_value = gr$p_value, + stringsAsFactors = FALSE) + })) + } + ) +} diff --git a/R/edid-nocov.R b/R/edid-nocov.R new file mode 100644 index 0000000..9042e9d --- /dev/null +++ b/R/edid-nocov.R @@ -0,0 +1,327 @@ +# edid-nocov.R +# No-covariate path for the EDiD estimator: +# compute_omega_star_nocov_edid() +# compute_efficient_weights_edid() +# compute_generated_outcomes_nocov_edid() +# compute_eif_nocov_edid() + +# --------------------------------------------------------------------------- +# Helper: get column index from panel_obj +# --------------------------------------------------------------------------- +.col <- function(panel_obj, period_val) { + panel_obj$period_to_col[[as.character(period_val)]] +} + +# --------------------------------------------------------------------------- +# Omega* covariance matrix (H x H) +# --------------------------------------------------------------------------- + +#' Compute the Omega* covariance matrix for the no-covariate EDiD path +#' +#' Builds the \eqn{H \times H} sample covariance matrix of the identifying +#' moments for cell \code{(target_g, target_t)}. +#' +#' @param target_g scalar cohort value +#' @param target_t scalar time period +#' @param pairs data.frame with columns \code{gp} and \code{tpre}; H rows +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param pt_assumption \code{"all"} or \code{"post"} +#' +#' @return numeric matrix H x H +#' @keywords internal +compute_omega_star_nocov_edid <- function( + target_g, target_t, pairs, panel_obj, pt_assumption +) { + H <- nrow(pairs) + n <- panel_obj$n + ow <- panel_obj$outcome_wide + + mask_g <- panel_obj$cohort_masks[[as.character(target_g)]] + mask_inf <- panel_obj$never_treated_mask + n_g <- sum(mask_g) + n_inf <- sum(mask_inf) + + col_t <- .col(panel_obj, target_t) + col_1 <- .col(panel_obj, panel_obj$period_1) + + if (pt_assumption == "post") { + # PT-Post: 1x1 matrix = var of standard DiD moment + tpre_val <- pairs$tpre[1L] + col_base <- .col(panel_obj, tpre_val) + + delta_g <- ow[mask_g, col_t] - ow[mask_g, col_base] + delta_inf <- ow[mask_inf, col_t] - ow[mask_inf, col_base] + + omega <- matrix( + cov_nn_edid(delta_g, delta_g) / n_g + + cov_nn_edid(delta_inf, delta_inf) / n_inf, + nrow = 1L, ncol = 1L + ) + return(omega) + } + + # --------------------------------------------------------------------------- + # PT-All: H x H matrix, entry-by-entry + # --------------------------------------------------------------------------- + # Pre-compute treated-group change (same for all j, k) + delta_g_t_1 <- ow[mask_g, col_t] - ow[mask_g, col_1] + + # Pre-compute never-treated changes for each unique tpre + unique_tpre <- unique(pairs$tpre) + delta_inf_cache <- vector("list", length(unique_tpre)) + names(delta_inf_cache) <- as.character(unique_tpre) + for (tp in unique_tpre) { + col_pre <- .col(panel_obj, tp) + delta_inf_cache[[as.character(tp)]] <- + ow[mask_inf, col_t] - ow[mask_inf, col_pre] + } + + # Pre-compute comparison-cohort changes for each unique (gp, tpre) + unique_gp_tpre <- unique(pairs[, c("gp", "tpre")]) + delta_gp_cache <- list() + for (rr in seq_len(nrow(unique_gp_tpre))) { + gp_val <- unique_gp_tpre$gp[rr] + tp_val <- unique_gp_tpre$tpre[rr] + key <- paste0(gp_val, "_", tp_val) + mask_gp <- panel_obj$cohort_masks[[as.character(gp_val)]] + col_pre <- .col(panel_obj, tp_val) + delta_gp_cache[[key]] <- ow[mask_gp, col_pre] - ow[mask_gp, col_1] + } + + omega <- matrix(0, nrow = H, ncol = H) + + for (j in seq_len(H)) { + gp_j <- pairs$gp[j] + tpre_j <- pairs$tpre[j] + key_j <- paste0(gp_j, "_", tpre_j) + n_gp_j <- sum(panel_obj$cohort_masks[[as.character(gp_j)]]) + delta_inf_j <- delta_inf_cache[[as.character(tpre_j)]] + delta_gp_j <- delta_gp_cache[[key_j]] + + for (k in seq_len(H)) { + if (k < j) { + omega[j, k] <- omega[k, j] # symmetric + next + } + gp_k <- pairs$gp[k] + tpre_k <- pairs$tpre[k] + key_k <- paste0(gp_k, "_", tpre_k) + n_gp_k <- sum(panel_obj$cohort_masks[[as.character(gp_k)]]) + delta_inf_k <- delta_inf_cache[[as.character(tpre_k)]] + delta_gp_k <- delta_gp_cache[[key_k]] + + # Term A: treated group variance (always present; same for all j, k) + term_a <- cov_nn_edid(delta_g_t_1, delta_g_t_1) / n_g + + # Term B: never-treated cross-covariance + term_b <- cov_nn_edid(delta_inf_j, delta_inf_k) / n_inf + + # Term C_j: non-zero only if gp_j == target_g + term_cj <- 0 + if (is.finite(gp_j) && gp_j == target_g) { + term_cj <- cov_nn_edid(delta_g_t_1, delta_gp_j) / n_g + } + + # Term C_k: non-zero only if gp_k == target_g + term_ck <- 0 + if (is.finite(gp_k) && gp_k == target_g) { + term_ck <- cov_nn_edid(delta_g_t_1, delta_gp_k) / n_g + } + + # Term D: non-zero only if gp_j == gp_k + term_d <- 0 + if (gp_j == gp_k) { # works for both finite and Inf + term_d <- cov_nn_edid(delta_gp_j, delta_gp_k) / n_gp_j + } + + omega[j, k] <- term_a + term_b - term_cj - term_ck + term_d + } + } + + omega +} + +# --------------------------------------------------------------------------- +# Efficient weights +# --------------------------------------------------------------------------- + +#' Compute efficient inverse-covariance weights +#' +#' Implements \eqn{w = (\Omega^{*-1} \mathbf{1}) / (\mathbf{1}' \Omega^{*-1} \mathbf{1})} +#' with fallback to uniform weights when the matrix is degenerate. +#' +#' @param omega_star numeric matrix H x H +#' +#' @return numeric vector length H, summing to 1 +#' @keywords internal +compute_efficient_weights_edid <- function(omega_star) { + H <- nrow(omega_star) + ones_H <- rep(1, H) + unif <- ones_H / H + + # Degenerate: all zeros -> uniform + if (all(omega_star == 0)) return(unif) + + # Check condition number + kappa <- check_condition_edid(omega_star) + if (!is.finite(kappa) || kappa > EDID_COND_THRESH) { + inv_omega <- compute_pseudoinverse_edid(omega_star) + } else { + inv_omega <- tryCatch( + solve(omega_star), + error = function(e) compute_pseudoinverse_edid(omega_star) + ) + } + + num <- drop(inv_omega %*% ones_H) + denom <- sum(num) + if (!is.finite(denom) || abs(denom) < EDID_DENOM_EPS) return(unif) + num / denom +} + +# --------------------------------------------------------------------------- +# Generated outcomes (scalar moments) +# --------------------------------------------------------------------------- + +#' Compute generated-outcome scalars for each valid pair +#' +#' @param target_g scalar cohort value +#' @param target_t scalar time period +#' @param pairs data.frame with columns \code{gp} and \code{tpre}; H rows +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param pt_assumption \code{"all"} or \code{"post"} +#' +#' @return numeric vector length H +#' @keywords internal +compute_generated_outcomes_nocov_edid <- function( + target_g, target_t, pairs, panel_obj, pt_assumption +) { + H <- nrow(pairs) + ow <- panel_obj$outcome_wide + + mask_g <- panel_obj$cohort_masks[[as.character(target_g)]] + mask_inf <- panel_obj$never_treated_mask + + col_t <- .col(panel_obj, target_t) + + if (pt_assumption == "post") { + # PT-Post: one pair (Inf, base) + tpre_val <- pairs$tpre[1L] + col_base <- .col(panel_obj, tpre_val) + y_hat <- mean(ow[mask_g, col_t] - ow[mask_g, col_base]) - + mean(ow[mask_inf, col_t] - ow[mask_inf, col_base]) + return(y_hat) # scalar; will be treated as length-1 vector + } + + # PT-All + col_1 <- .col(panel_obj, panel_obj$period_1) + term_g <- mean(ow[mask_g, col_t] - ow[mask_g, col_1]) + + y_hat <- numeric(H) + for (j in seq_len(H)) { + gp_j <- pairs$gp[j] + tpre_j <- pairs$tpre[j] + col_pre <- .col(panel_obj, tpre_j) + + term_inf <- mean(ow[mask_inf, col_t] - ow[mask_inf, col_pre]) + + mask_gp <- panel_obj$cohort_masks[[as.character(gp_j)]] + term_gp <- mean(ow[mask_gp, col_pre] - ow[mask_gp, col_1]) + + y_hat[j] <- term_g - term_inf - term_gp + } + y_hat +} + +# --------------------------------------------------------------------------- +# Efficient Influence Function (per unit, length n) +# --------------------------------------------------------------------------- + +#' Compute the no-covariate efficient influence function for a (g, t) cell +#' +#' @param target_g scalar cohort value +#' @param target_t scalar time period +#' @param pairs data.frame with columns \code{gp} and \code{tpre}; H rows +#' @param weights numeric vector length H (efficient weights) +#' @param panel_obj panel object from \code{prepare_edid_panel()} +#' @param att_gt scalar ATT estimate for this cell +#' @param pt_assumption \code{"all"} or \code{"post"} +#' +#' @return numeric vector length n (zero-mean by construction) +#' @keywords internal +compute_eif_nocov_edid <- function( + target_g, target_t, pairs, weights, panel_obj, att_gt, pt_assumption +) { + n <- panel_obj$n + H <- nrow(pairs) + ow <- panel_obj$outcome_wide + + mask_g <- panel_obj$cohort_masks[[as.character(target_g)]] + mask_inf <- panel_obj$never_treated_mask + pi_g <- panel_obj$cohort_fractions[[as.character(target_g)]] + pi_inf <- sum(mask_inf) / n + + col_t <- .col(panel_obj, target_t) + + eif <- numeric(n) + + if (pt_assumption == "post") { + # PT-Post: one pair; base = g - 1 - anticipation + tpre_val <- pairs$tpre[1L] + col_base <- .col(panel_obj, tpre_val) + w_1 <- weights[1L] + + delta_g <- ow[mask_g, col_t] - ow[mask_g, col_base] + mean_g <- mean(delta_g) + eif[mask_g] <- eif[mask_g] + w_1 * (delta_g - mean_g) / pi_g + + delta_inf <- ow[mask_inf, col_t] - ow[mask_inf, col_base] + mean_inf <- mean(delta_inf) + eif[mask_inf] <- eif[mask_inf] - w_1 * (delta_inf - mean_inf) / pi_inf + + # gp_j == Inf: no comparison cohort term + } else { + # PT-All + col_1 <- .col(panel_obj, panel_obj$period_1) + col_base <- col_1 # base for treated group + + delta_g_t_base <- ow[mask_g, col_t] - ow[mask_g, col_base] + mean_g_t_base <- mean(delta_g_t_base) + + for (j in seq_len(H)) { + w_j <- weights[j] + gp_j <- pairs$gp[j] + tpre_j <- pairs$tpre[j] + col_pre <- .col(panel_obj, tpre_j) + + # Treated group contribution (always present) + # Y_hat_j enters via centering: phi_{ij} = I(G=g)/pi_g * (delta - Y_hat_j) + # But the EIF is: score - att_gt, and score = sum w_j phi_{ij} + # phi_{ij} for treated group = I(G=g)/pi_g * (delta_{g,t,base} - Y_hat_j) + # We compute: sum_j w_j * I(G=g)/pi_g * (delta - Y_hat_j) + # = I(G=g)/pi_g * [ (delta - mean_g) + (mean_g - sum_j w_j Y_hat_j) ] + # But it's simpler to accumulate directly: + eif[mask_g] <- eif[mask_g] + + w_j * (delta_g_t_base - mean_g_t_base) / pi_g + + # Never-treated contribution (subtract) + delta_inf_t_pre <- ow[mask_inf, col_t] - ow[mask_inf, col_pre] + mean_inf_t_pre <- mean(delta_inf_t_pre) + eif[mask_inf] <- eif[mask_inf] - + w_j * (delta_inf_t_pre - mean_inf_t_pre) / pi_inf + + # Comparison cohort contribution (subtract) + mask_gp <- panel_obj$cohort_masks[[as.character(gp_j)]] + pi_gp <- panel_obj$cohort_fractions[[as.character(gp_j)]] + delta_gp_pre_base <- ow[mask_gp, col_pre] - ow[mask_gp, col_base] + mean_gp_pre_base <- mean(delta_gp_pre_base) + eif[mask_gp] <- eif[mask_gp] - + w_j * (delta_gp_pre_base - mean_gp_pre_base) / pi_gp + } + } + + # The score accumulated above already has mean 0 by construction: + # each group's contribution is demeaned (delta_i - mean(delta)). + # The EIF is the score itself; do NOT subtract att_gt again. + eif +} diff --git a/R/edid-pairs.R b/R/edid-pairs.R new file mode 100644 index 0000000..4a59e59 --- /dev/null +++ b/R/edid-pairs.R @@ -0,0 +1,88 @@ +# edid-pairs.R +# Enumerate the set of valid comparison pairs H_gt for a target cell (g, t). + +#' Enumerate valid comparison pairs for a target (g, t) cell +#' +#' Constructs the set \eqn{H_{gt}} of valid \code{(gp, tpre)} pairs used to +#' form identifying DiD moments for cohort \code{target_g} at time \code{target_t}. +#' +#' Under \strong{PT-Post}: returns exactly one pair \code{(Inf, target_g - 1 - anticipation)}, +#' or a 0-row data.frame if that pre-period does not exist in \code{time_periods} or +#' equals \code{period_1}. +#' +#' Under \strong{PT-All}: iterates over treated cohorts \code{gp} only (the +#' never-treated group is the time control inside every moment, not a comparison +#' cohort). For \code{gp == target_g}: valid \code{tpre} are all periods strictly +#' less than \code{gp - anticipation}, including \code{period_1} (this is the +#' degenerate CS DiD moment whose comparison-cohort EIF term is identically zero). +#' For \code{gp != target_g}: valid \code{tpre} are periods strictly between +#' \code{period_1} and \code{gp - anticipation} (exclusive on both ends). +#' Returns a 0-row data.frame if no valid pairs exist (e.g., single cohort with +#' only one pre-period equal to \code{period_1}). +#' +#' @param target_g scalar: treatment cohort being estimated +#' @param treatment_groups sorted numeric vector of all finite cohort values +#' @param time_periods sorted numeric vector of all time periods in the panel +#' @param period_1 scalar: universal first period +#' @param pt_assumption character: \code{"all"} or \code{"post"} +#' @param anticipation integer >= 0 +#' @param never_treated_val value used to represent the never-treated cohort +#' (default \code{Inf}) +#' +#' @return data.frame with columns \code{gp} (comparison cohort) and +#' \code{tpre} (pre-period). May have 0 rows. +#' @keywords internal +enumerate_valid_pairs_edid <- function( + target_g, + treatment_groups, + time_periods, + period_1, + pt_assumption, + anticipation = 0L, + never_treated_val = Inf +) { + empty <- data.frame(gp = numeric(0L), tpre = numeric(0L)) + + if (pt_assumption == "post") { + # ----------------------------------------------------------------------- + # PT-Post: exactly one pair (Inf, g - 1 - anticipation) + # ----------------------------------------------------------------------- + tpre_val <- target_g - 1L - anticipation + if (!tpre_val %in% time_periods) return(empty) + if (tpre_val == period_1) return(empty) + return(data.frame(gp = never_treated_val, tpre = tpre_val)) + } + + # ------------------------------------------------------------------------- + # PT-All: loop over treated cohorts only (never-treated is NOT a comparison + # cohort; it appears only as the time control E[Y_inf(t)-Y_inf(tpre)] inside + # each moment). + # + # For g' == target_g: valid tpre = {s : s < eff_start(g')} + # -- INCLUDES period_1 (degenerate CS DiD moment; comparison EIF = 0) + # For g' != target_g: valid tpre = {s : period_1 < s < eff_start(g')} + # -- EXCLUDES period_1 (non-degenerate moments only) + # ------------------------------------------------------------------------- + out_gp <- numeric(0L) + out_tpre <- numeric(0L) + + for (gp in treatment_groups) { + eff_start <- gp - anticipation + if (gp == target_g) { + # Self-pair: include period_1 + valid_tpre <- time_periods[time_periods < eff_start] + } else { + # Cross-pair: exclude period_1 + valid_tpre <- time_periods[ + time_periods > period_1 & time_periods < eff_start + ] + } + if (length(valid_tpre) > 0L) { + out_gp <- c(out_gp, rep(gp, length(valid_tpre))) + out_tpre <- c(out_tpre, valid_tpre) + } + } + + if (length(out_gp) == 0L) return(empty) + data.frame(gp = out_gp, tpre = out_tpre, stringsAsFactors = FALSE) +} diff --git a/R/edid-utils.R b/R/edid-utils.R new file mode 100644 index 0000000..0bf0210 --- /dev/null +++ b/R/edid-utils.R @@ -0,0 +1,43 @@ +# edid-utils.R +# Internal constants and small shared helpers for the EDiD estimator. +# Do NOT modify this file to change the estimator logic -- see the relevant +# edid-*.R file for each component. + +#' @keywords internal +EDID_COND_THRESH <- 1e12 # condition number above which pseudoinverse is used + +#' @keywords internal +EDID_DENOM_EPS <- 1e-12 # denominator threshold below which uniform weights are used + +#' @keywords internal +EDID_CLIP_LO <- 1 / 20 # ratio clipping lower bound (deferred: covariate path) + +#' @keywords internal +EDID_CLIP_HI <- 20 # ratio clipping upper bound (deferred: covariate path) + +#' @keywords internal +EDID_SE_EPS <- sqrt(.Machine$double.eps) * 10 # SE below which is treated as zero/NA + +# --------------------------------------------------------------------------- +# Small shared helpers +# --------------------------------------------------------------------------- + +#' Biased sample covariance (divide by n, not n-1) +#' +#' @param x numeric vector +#' @param y numeric vector, same length as x +#' @return scalar +#' @keywords internal +cov_nn_edid <- function(x, y) { + mean((x - mean(x)) * (y - mean(y))) +} + +#' Safe mean: returns NA on empty vector instead of NaN +#' +#' @param x numeric vector +#' @return scalar +#' @keywords internal +safe_mean_edid <- function(x) { + if (length(x) == 0L) return(NA_real_) + mean(x) +} diff --git a/R/edid-validate.R b/R/edid-validate.R new file mode 100644 index 0000000..b30a00a --- /dev/null +++ b/R/edid-validate.R @@ -0,0 +1,323 @@ +# edid-validate.R +# Input validation for edid(). All checks are performed before any computation. + +#' Validate inputs to \code{edid()} +#' +#' Performs all structural and type checks on user-supplied arguments. +#' Returns invisibly \code{TRUE} on success; stops with an informative message +#' on any failure. +#' +#' @param data data.frame or coercible +#' @param yname character scalar: outcome column name +#' @param idname character scalar: unit id column name +#' @param tname character scalar: time column name +#' @param gname character scalar: first-treatment-period column name +#' @param covariates character vector or NULL +#' @param pt_assumption character scalar, already matched via \code{match.arg} +#' @param alp numeric scalar in (0, 1) +#' @param clustervars character scalar or NULL +#' @param control_group character scalar, already matched via \code{match.arg} +#' @param biters non-negative integer (internal bootstrap iterations) +#' @param anticipation non-negative integer +#' @param survey_design always NULL (survey not yet implemented) +#' +#' @return invisibly TRUE +#' @keywords internal +validate_edid_inputs <- function( + data, yname, idname, tname, gname, xformla = NULL, covariates, + pt_assumption, alp, clustervars, control_group, + biters, anticipation, survey_design +) { + + # ------------------------------------------------------------------ + # 1. data is data.frame-like and has rows + # ------------------------------------------------------------------ + if (!is.data.frame(data) && !inherits(data, "data.table") && + !inherits(data, "tbl_df")) { + # try coercing + tryCatch( + data <- as.data.frame(data), + error = function(e) stop("`data` must be a data.frame or coercible to one.") + ) + } + if (nrow(data) == 0L) { + stop("`data` has no rows.") + } + + # ------------------------------------------------------------------ + # 2. yname / idname / tname / gname are character scalars naming + # existing columns + # ------------------------------------------------------------------ + .check_col <- function(arg, argname) { + if (!is.character(arg) || length(arg) != 1L) { + stop(sprintf("`%s` must be a character scalar (column name).", argname)) + } + if (!arg %in% names(data)) { + stop(sprintf("`%s` = \"%s\" is not a column in `data`.", argname, arg)) + } + } + .check_col(yname, "yname") + .check_col(idname, "idname") + .check_col(tname, "tname") + .check_col(gname, "gname") + + # Columns must be distinct + col_names <- c(yname, idname, tname, gname) + if (anyDuplicated(col_names)) { + stop("`yname`, `idname`, `tname`, and `gname` must name distinct columns.") + } + + # ------------------------------------------------------------------ + # 3. yname column is numeric; no NA; all finite + # ------------------------------------------------------------------ + y_col <- data[[yname]] + if (!is.numeric(y_col)) { + stop(sprintf("Column `%s` (yname) must be numeric.", yname)) + } + if (anyNA(y_col)) { + stop(sprintf("Column `%s` (yname) contains NA values. ", yname), + "edid() requires a complete, balanced panel with no missing outcomes.") + } + if (!all(is.finite(y_col))) { + stop(sprintf("Column `%s` (yname) contains non-finite values (Inf/-Inf/NaN). ", yname), + "edid() requires all outcomes to be finite.") + } + + # ------------------------------------------------------------------ + # 4. tname column is numeric; no NA + # ------------------------------------------------------------------ + t_col <- data[[tname]] + if (!is.numeric(t_col)) { + stop(sprintf("Column `%s` (tname) must be numeric.", tname)) + } + if (anyNA(t_col)) { + stop(sprintf("Column `%s` (tname) contains NA values.", tname)) + } + + # ------------------------------------------------------------------ + # 5. gname column is numeric; no NA + # ------------------------------------------------------------------ + ft_col <- data[[gname]] + if (!is.numeric(ft_col)) { + stop(sprintf("Column `%s` (gname) must be numeric.", gname)) + } + if (anyNA(ft_col)) { + stop(sprintf("Column `%s` (gname) contains NA values. ", gname), + "Use Inf to denote never-treated units.") + } + + # ------------------------------------------------------------------ + # 6. No duplicate (idname, tname) rows + # ------------------------------------------------------------------ + ut_key <- paste(data[[idname]], data[[tname]], sep = "___") + if (anyDuplicated(ut_key)) { + stop("Duplicate (idname, tname) pairs found in `data`. ", + "edid() requires a balanced panel with exactly one observation per unit-period.") + } + + # ------------------------------------------------------------------ + # 7. Panel is balanced: every unit appears in every time period + # ------------------------------------------------------------------ + all_units_v <- unique(data[[idname]]) + all_times_v <- unique(data[[tname]]) + n_units <- length(all_units_v) + n_times <- length(all_times_v) + expected_obs <- n_units * n_times + if (nrow(data) != expected_obs) { + stop(sprintf( + "Panel is unbalanced: expected %d rows (%d units x %d periods) but found %d. ", + expected_obs, n_units, n_times, nrow(data)), + "edid() requires a balanced panel.") + } + + # ------------------------------------------------------------------ + # 8. Treatment is absorbing: gname is constant within unit + # ------------------------------------------------------------------ + ft_by_unit <- tapply(data[[gname]], data[[idname]], function(x) length(unique(x))) + if (any(ft_by_unit > 1L)) { + bad <- names(ft_by_unit)[ft_by_unit > 1L] + stop(sprintf( + "`%s` (gname) is not constant within unit for %d unit(s) (e.g., %s). ", + gname, length(bad), bad[1]), + "Treatment must be absorbing.") + } + + # ------------------------------------------------------------------ + # 9-10. Control group availability + # ------------------------------------------------------------------ + # Get time-invariant gname per unit (one row per unit) + unit_ft <- tapply(data[[gname]], data[[idname]], `[`, 1L) + + if (control_group == "nevertreated") { + n_never <- sum(is.infinite(unit_ft)) + if (n_never == 0L) { + stop("No never-treated units found (`gname == Inf`). ", + "edid() requires never-treated units when `control_group = \"nevertreated\"`.") + } + } else { + # notyettreated + finite_ft <- unit_ft[is.finite(unit_ft)] + if (length(finite_ft) == 0L) { + stop("No finite first-treatment values found; cannot determine last cohort.") + } + last_g <- max(finite_ft) + n_last <- sum(finite_ft == last_g) + if (n_last == 0L) { + stop("No units in the last treated cohort found. ", + "Cannot use `control_group = \"notyettreated\"`.") + } + } + + # ------------------------------------------------------------------ + # 11. covariates is deprecated: error with redirect message + # ------------------------------------------------------------------ + if (!is.null(covariates)) { + stop( + "The 'covariates' argument has been replaced by 'xformla'. ", + "Pass a formula like xformla = ~ X1 + X2." + ) + } + + # ------------------------------------------------------------------ + # 11b. xformla validation (enhanced: NA, time-invariance, formula) + # ------------------------------------------------------------------ + if (!is.null(xformla)) { + if (!inherits(xformla, "formula")) { + stop("`xformla` must be a one-sided formula (e.g., ~ X1 + X2) or NULL.") + } + # Extract RHS variable names (skip ~1) + rhs_vars <- all.vars(xformla) + if (length(rhs_vars) > 0L) { + missing_vars <- setdiff(rhs_vars, names(data)) + if (length(missing_vars) > 0L) { + stop(sprintf( + "Variable(s) in `xformla` not found in `data`: %s", + paste(missing_vars, collapse = ", ") + )) + } + + # Check all covariate columns are numeric or factor + for (v in rhs_vars) { + if (!is.numeric(data[[v]]) && !is.factor(data[[v]])) { + stop(sprintf("Covariate column `%s` must be numeric or factor.", v)) + } + } + + # NA check: reject any NA in covariate columns + for (v in rhs_vars) { + if (anyNA(data[[v]])) { + stop(sprintf( + "Covariate column `%s` contains NA values. ", + v), + "edid() requires complete covariate data for all units and periods.") + } + } + + # Time-invariance check: covariates must be constant within unit + for (v in rhs_vars) { + n_unique_by_unit <- tapply(data[[v]], data[[idname]], + function(x) length(unique(x))) + if (any(n_unique_by_unit > 1L)) { + bad_units <- names(n_unique_by_unit)[n_unique_by_unit > 1L] + stop(sprintf( + "Covariate `%s` is time-varying for %d unit(s) (e.g., unit %s). ", + v, length(bad_units), bad_units[1]), + "edid() requires all covariates in `xformla` to be time-invariant (constant within unit).") + } + } + + # Validate that model.matrix() can expand the formula without error + # Build a one-row-per-unit test frame + test_idx <- match(unique(data[[idname]]), data[[idname]]) + test_df <- data[test_idx, , drop = FALSE] + tryCatch({ + mm <- stats::model.matrix(xformla, data = test_df) + # Remove intercept if present + if ("(Intercept)" %in% colnames(mm)) { + mm <- mm[, colnames(mm) != "(Intercept)", drop = FALSE] + } + if (ncol(mm) == 0L) { + stop("`xformla` expands to zero non-intercept columns. Use xformla = NULL for no covariates.") + } + if (anyNA(mm)) { + stop("`xformla` expansion via model.matrix() produces NA values. Check for unsupported formula features.") + } + if (any(!is.finite(mm))) { + stop("`xformla` expansion via model.matrix() produces non-finite values.") + } + }, error = function(e) { + stop(sprintf( + "Cannot expand `xformla` via model.matrix(): %s", + conditionMessage(e) + )) + }) + } + } + + # ------------------------------------------------------------------ + # 12. survey_design != NULL -> stop (stub) + # ------------------------------------------------------------------ + if (!is.null(survey_design)) { + stop("survey_design not yet implemented in edid(). ", + "Pass survey_design = NULL or omit the argument.") + } + + # ------------------------------------------------------------------ + # 13. alp in (0, 1) + # ------------------------------------------------------------------ + if (!is.numeric(alp) || length(alp) != 1L || alp <= 0 || alp >= 1) { + stop("`alp` must be a numeric scalar strictly between 0 and 1.") + } + + # ------------------------------------------------------------------ + # 14. biters >= 0 integer + # ------------------------------------------------------------------ + if (!is.numeric(biters) || length(biters) != 1L || + biters < 0 || biters != floor(biters)) { + stop("`biters` must be a non-negative integer.") + } + + # ------------------------------------------------------------------ + # 15. anticipation >= 0 integer; effective pre-treatment check + # ------------------------------------------------------------------ + if (!is.numeric(anticipation) || length(anticipation) != 1L || + anticipation < 0 || anticipation != floor(anticipation)) { + stop("`anticipation` must be a non-negative integer.") + } + min_ft <- min(unit_ft[is.finite(unit_ft)]) + min_t <- min(all_times_v) + if (min_ft - anticipation <= min_t) { + stop(sprintf( + "With `anticipation = %d`, the earliest treatment cohort (%g) would be treated at or before ", + anticipation, min_ft), + sprintf("the first observed period (%g). ", min_t), + "There must be at least one pre-treatment period available.") + } + + # ------------------------------------------------------------------ + # 16. clustervars column checks + # ------------------------------------------------------------------ + if (!is.null(clustervars)) { + if (!is.character(clustervars) || length(clustervars) != 1L) { + stop("`clustervars` must be a character scalar naming a column in `data`, or NULL.") + } + if (!clustervars %in% names(data)) { + stop(sprintf("`clustervars` = \"%s\" is not a column in `data`.", clustervars)) + } + cl_col <- data[[clustervars]] + if (anyNA(cl_col)) { + stop(sprintf("Cluster column `%s` contains NA values.", clustervars)) + } + # Time-invariant within unit + cl_by_unit <- tapply(cl_col, data[[idname]], function(x) length(unique(x))) + if (any(cl_by_unit > 1L)) { + bad <- names(cl_by_unit)[cl_by_unit > 1L] + stop(sprintf( + "Cluster variable `%s` is not time-invariant for %d unit(s) (e.g., %s). ", + clustervars, length(bad), bad[1]), + "Cluster variable must be constant within unit.") + } + } + + invisible(TRUE) +} diff --git a/R/edid.R b/R/edid.R new file mode 100644 index 0000000..9b4ccf3 --- /dev/null +++ b/R/edid.R @@ -0,0 +1,373 @@ +#' Efficient Difference-in-Differences Estimator +#' +#' Estimates group-time average treatment effects \eqn{ATT(g, t)} for staggered +#' adoption designs using the Efficient DiD (EDiD) estimator of Chen, Sant'Anna +#' & Xie (2025). The estimator combines all valid DiD identifying moments for +#' each \eqn{(g, t)} cell with optimal inverse-covariance weights to achieve +#' minimum asymptotic variance. +#' +#' @param data A \code{data.frame}, \code{data.table}, or tibble in long format +#' (one row per unit-time observation). +#' @param yname Character scalar: name of the outcome column (must be numeric +#' with no missing or non-finite values). +#' @param idname Character scalar: name of the unit identifier column. +#' @param tname Character scalar: name of the time period column (numeric). +#' @param gname Character scalar: name of the column recording each unit's +#' first treatment period. Never-treated units should have \code{Inf} or +#' \code{0} (the \code{att_gt()} convention). \code{0} is automatically +#' converted to \code{Inf} internally. +#' @param xformla A one-sided formula specifying covariates to condition on, +#' e.g., \code{~ X1 + X2}. Default \code{NULL} (equivalent to \code{~1}, +#' no covariates). When \code{NULL} or \code{~1}, the efficient no-covariate +#' path is used. \strong{Note}: The \code{covariates} argument is deprecated +#' and will error if non-NULL; use \code{xformla} instead. +#' @param covariates Character vector of covariate column names, or \code{NULL} +#' (default). \strong{Currently not implemented}: passing non-NULL triggers an +#' error. +#' @param pt_assumption Parallel-trends assumption regime. One of: +#' \describe{ +#' \item{\code{"all"}}{PT-All: parallel trends holds for all pre-treatment +#' periods (default). Uses all valid \eqn{(g', t_{pre})} pairs.} +#' \item{\code{"post"}}{PT-Post: parallel trends holds only for the period +#' immediately before treatment. Each cell uses a single DiD moment.} +#' } +#' @param alp Significance level for confidence intervals. Default \code{0.05}. +#' @param clustervars Character scalar naming a time-invariant cluster variable +#' in \code{data}, or \code{NULL} for no clustering (default). When supplied, +#' cluster-robust standard errors are computed via the sandwich EIF formula. +#' Note: edid() currently supports only a single cluster variable internally. +#' @param control_group Control group definition. One of: +#' \describe{ +#' \item{\code{"nevertreated"}}{Use never-treated units (default).} +#' \item{\code{"notyettreated"}}{Use the last-treated cohort as +#' pseudo-controls (relabeled as never-treated internally).} +#' } +#' @param bstrap Logical: whether to use multiplier bootstrap inference. +#' Default \code{FALSE} (analytical standard errors). When \code{TRUE}, +#' \code{biters} bootstrap draws are used. +#' @param biters Positive integer: number of multiplier bootstrap iterations. +#' Default \code{1000L}. Only used when \code{bstrap = TRUE}. +#' @param bootstrap_weights Distribution for multiplier weights. One of +#' \code{"rademacher"} (default), \code{"mammen"}, or \code{"webb"}. +#' @param seed Integer seed for reproducibility of bootstrap draws, or +#' \code{NULL} (default, no seed set). +#' @param anticipation Non-negative integer: number of anticipation periods. +#' Default \code{0L}. The effective treatment start for cohort \eqn{g} is +#' \eqn{g - \text{anticipation}}. +#' @param aggregate Which aggregations to compute. One or more of +#' \code{"all"} (default), \code{"overall"}, \code{"event_study"}, +#' \code{"group"}, or \code{"none"}. +#' @param balance_e Integer or \code{NULL}: if not \code{NULL}, restricts the +#' event-study aggregation to relative times in +#' \eqn{[-\text{balance\_e}, \text{balance\_e}]}. +#' @param survey_design Always \code{NULL}. Survey designs are not yet +#' implemented; passing a non-NULL value triggers an error. +#' @param store_eif Logical: if \code{TRUE}, store the full \eqn{n \times K} +#' EIF matrix in \code{edid_fit$eif}. Default \code{FALSE}. The EIF is +#' always computed internally when \code{bstrap = TRUE}. +#' +#' @return An object of class \code{edid_fit} (a list) with elements: +#' \describe{ +#' \item{\code{call}}{The matched call.} +#' \item{\code{att_gt}}{data.frame of cell-level estimates (group, time, +#' att, se, ci_lower, ci_upper, t_stat, p_value, is_pre).} +#' \item{\code{overall}}{List: overall ATT with SE and CI.} +#' \item{\code{event_study}}{List of per-relative-time ATTs.} +#' \item{\code{group}}{List of per-cohort ATTs.} +#' \item{\code{eif}}{EIF matrix or \code{NULL}.} +#' \item{\code{bootstrap}}{Bootstrap results or \code{NULL}.} +#' \item{\code{bstrap}}{Logical: whether bootstrap inference was used.} +#' } +#' +#' @references Chen, L., Sant'Anna, P. H. C., & Xie, Y. (2025). +#' \emph{Efficient Difference-in-Differences}. Working paper. +#' +#' @examples +#' # Simulate a simple balanced panel with staggered adoption +#' set.seed(42) +#' n_units <- 100 +#' n_periods <- 6 +#' unit_ids <- rep(1:n_units, each = n_periods) +#' time_ids <- rep(1:n_periods, times = n_units) +#' # Assign cohorts: 1/3 treated in period 3, 1/3 in period 5, 1/3 never +#' cohort_assign <- rep( +#' c(3, 5, Inf), +#' times = c(ceiling(n_units / 3), +#' ceiling(n_units / 3), +#' n_units - 2 * ceiling(n_units / 3)) +#' )[1:n_units] +#' first_treat_vec <- cohort_assign[unit_ids] +#' # Generate outcomes: ATT = 1 for treated post-treatment +#' treat_effect <- as.numeric(time_ids >= first_treat_vec) +#' y_vals <- 0.5 * time_ids + treat_effect + rnorm(n_units * n_periods, sd = 0.5) +#' panel_df <- data.frame( +#' id = unit_ids, +#' period = time_ids, +#' y = y_vals, +#' first_treat = first_treat_vec +#' ) +#' # Fit EDiD (no-covariate, PT-All, analytical SE) +#' fit <- edid( +#' data = panel_df, +#' yname = "y", +#' idname = "id", +#' tname = "period", +#' gname = "first_treat", +#' pt_assumption = "all" +#' ) +#' # View overall ATT +#' fit$overall$att +#' # Extract cell-level estimates +#' head(fit$att_gt) +#' +#' @export +edid <- function( + data, + yname, + idname, + tname, + gname, + xformla = NULL, + covariates = NULL, + pt_assumption = c("all", "post"), + alp = 0.05, + clustervars = NULL, + control_group = c("nevertreated", "notyettreated"), + bstrap = FALSE, + biters = 1000L, + bootstrap_weights = c("rademacher", "mammen", "webb"), + seed = NULL, + anticipation = 0L, + aggregate = c("all", "overall", "event_study", "group", "none"), + balance_e = NULL, + survey_design = NULL, + store_eif = FALSE +) { + mc <- match.call() + + # ------------------------------------------------------------------ + # Argument matching + # ------------------------------------------------------------------ + pt_assumption <- match.arg(pt_assumption) + control_group <- match.arg(control_group) + bootstrap_weights <- match.arg(bootstrap_weights) + aggregate <- match.arg(aggregate, several.ok = TRUE) + # When "all" is present it subsumes the others + if ("all" %in% aggregate) aggregate <- "all" + + anticipation <- as.integer(anticipation) + + # ------------------------------------------------------------------ + # Bootstrap: derive internal n_bootstrap from bstrap + biters + # ------------------------------------------------------------------ + n_bootstrap_internal <- if (bstrap) as.integer(biters) else 0L + + # ------------------------------------------------------------------ + # Accept G=0 (att_gt convention) or G=Inf (edid native) for never-treated + # Convert 0 -> Inf internally, matching att_gt's internal transformation + data <- as.data.frame(data) + zero_nt <- is.finite(data[[gname]]) & data[[gname]] == 0 + if (any(zero_nt)) { + data[[gname]] <- ifelse(zero_nt, Inf, data[[gname]]) + } + + # ------------------------------------------------------------------ + # Validation + # ------------------------------------------------------------------ + validate_edid_inputs( + data = data, + yname = yname, + idname = idname, + tname = tname, + gname = gname, + xformla = xformla, + covariates = covariates, + pt_assumption = pt_assumption, + alp = alp, + clustervars = clustervars, + control_group = control_group, + biters = n_bootstrap_internal, + anticipation = anticipation, + survey_design = survey_design + ) + + # ------------------------------------------------------------------ + # Panel preparation + # ------------------------------------------------------------------ + panel_obj <- prepare_edid_panel( + data = data, + yname = yname, + idname = idname, + tname = tname, + gname = gname, + xformla = xformla, + covariates = covariates, + clustervars = clustervars, + control_group = control_group, + anticipation = anticipation + ) + + # ------------------------------------------------------------------ + # Cell estimation + # EIF is always needed for aggregated SE computation, not just bootstrap. + # ------------------------------------------------------------------ + do_any_agg <- !("none" %in% aggregate) + need_eif_for_boot <- (n_bootstrap_internal > 0L) + # need_eif: TRUE whenever we need aggregated inference OR bootstrap + need_eif_internal <- do_any_agg || need_eif_for_boot + + fit_result <- fit_edid_cells( + panel_obj = panel_obj, + pt_assumption = pt_assumption, + alpha = alp, + store_eif = store_eif, + xformla = xformla, + need_eif = need_eif_internal, + seed = seed + ) + + cells <- fit_result$cells + eif_matrix <- fit_result$eif_matrix + cell_index <- fit_result$cell_index + + # ------------------------------------------------------------------ + # Convenience att_gt table + # ------------------------------------------------------------------ + att_gt_df <- data.frame( + group = vapply(cells, function(x) x$group, numeric(1L)), + time = vapply(cells, function(x) x$time, numeric(1L)), + att = vapply(cells, function(x) if (is.null(x$att)) NA_real_ else x$att, numeric(1L)), + se = vapply(cells, function(x) if (is.null(x$se)) NA_real_ else x$se, numeric(1L)), + ci_lower = vapply(cells, function(x) if (is.null(x$ci_lower)) NA_real_ else x$ci_lower, numeric(1L)), + ci_upper = vapply(cells, function(x) if (is.null(x$ci_upper)) NA_real_ else x$ci_upper, numeric(1L)), + t_stat = vapply(cells, function(x) if (is.null(x$t_stat)) NA_real_ else x$t_stat, numeric(1L)), + p_value = vapply(cells, function(x) if (is.null(x$p_value)) NA_real_ else x$p_value, numeric(1L)), + n_pairs = vapply(cells, function(x) if (is.null(x$n_pairs)) 0L else x$n_pairs, integer(1L)), + is_pre = vapply(cells, function(x) x$is_pre, logical(1L)), + stringsAsFactors = FALSE + ) + + # ------------------------------------------------------------------ + # Aggregation + # ------------------------------------------------------------------ + do_overall <- aggregate %in% c("all", "overall") + do_event_study <- aggregate %in% c("all", "event_study") + do_group <- aggregate %in% c("all", "group") + + overall_res <- NULL + event_study_res <- NULL + group_res <- NULL + + if (do_overall) { + overall_res <- aggregate_overall_edid(cells, eif_matrix, cell_index, panel_obj, alp) + } + if (do_event_study) { + event_study_res <- aggregate_event_study_edid( + cells, eif_matrix, cell_index, panel_obj, alp, balance_e + ) + } + if (do_group) { + group_res <- aggregate_group_edid(cells, eif_matrix, cell_index, panel_obj, alp) + } + + # ------------------------------------------------------------------ + # Bootstrap + # ------------------------------------------------------------------ + bootstrap_res <- NULL + if (n_bootstrap_internal > 0L) { + if (is.null(eif_matrix)) { + warning("EIF matrix is NULL; bootstrap cannot be run. ", + "This should not happen --- please report this issue.") + } else { + boot_agg <- if ("all" %in% aggregate || identical(aggregate, "all")) "all" else + paste(intersect(aggregate, c("overall", "event_study", "group")), collapse = ",") + bootstrap_res <- run_multiplier_bootstrap_edid( + cells = cells, + eif_matrix = eif_matrix, + cell_index = cell_index, + panel_obj = panel_obj, + n_bootstrap = n_bootstrap_internal, + bootstrap_weights = bootstrap_weights, + seed = seed, + aggregate = "all", + balance_e = balance_e, + alpha = alp + ) + class(bootstrap_res) <- c("edid_bootstrap", "list") + + # Overwrite SEs/CIs with bootstrap versions + if (do_overall && !is.null(overall_res) && !is.null(bootstrap_res$overall_b)) { + bs_ov <- compute_bootstrap_stats_edid(bootstrap_res$overall_b, overall_res$att, alp) + overall_res$se <- bs_ov$se_boot + overall_res$ci_lower <- bs_ov$ci_lower + overall_res$ci_upper <- bs_ov$ci_upper + overall_res$p_value <- bs_ov$p_value_boot + overall_res$t_stat <- if (!is.na(bs_ov$se_boot) && bs_ov$se_boot > 0) { + overall_res$att / bs_ov$se_boot + } else NA_real_ + } + + if (do_event_study && !is.null(event_study_res) && + !is.null(bootstrap_res$event_study_b)) { + for (e_nm in names(event_study_res)) { + draws <- bootstrap_res$event_study_b[[e_nm]] + if (is.null(draws)) next + bs_es <- compute_bootstrap_stats_edid(draws, event_study_res[[e_nm]]$att, alp) + event_study_res[[e_nm]]$se <- bs_es$se_boot + event_study_res[[e_nm]]$ci_lower <- bs_es$ci_lower + event_study_res[[e_nm]]$ci_upper <- bs_es$ci_upper + event_study_res[[e_nm]]$p_value <- bs_es$p_value_boot + } + } + + if (do_group && !is.null(group_res) && !is.null(bootstrap_res$group_b)) { + for (g_nm in names(group_res)) { + draws <- bootstrap_res$group_b[[g_nm]] + if (is.null(draws)) next + bs_gr <- compute_bootstrap_stats_edid(draws, group_res[[g_nm]]$att, alp) + group_res[[g_nm]]$se <- bs_gr$se_boot + group_res[[g_nm]]$ci_lower <- bs_gr$ci_lower + group_res[[g_nm]]$ci_upper <- bs_gr$ci_upper + group_res[[g_nm]]$p_value <- bs_gr$p_value_boot + } + # Also update cell-level SEs from bootstrap if we have per-cell draws + # (not stored at cell level -- only aggregate-level bootstrap is implemented) + } + } + } + + # ------------------------------------------------------------------ + # EIF matrix storage (only if user requested it) + # ------------------------------------------------------------------ + eif_export <- if (store_eif) eif_matrix else NULL + + # ------------------------------------------------------------------ + # Construct edid_fit S3 object + # ------------------------------------------------------------------ + edid_fit <- list( + call = mc, + pt_assumption = pt_assumption, + control_group = control_group, + alpha = alp, + n = panel_obj$n, + T_periods = panel_obj$T_periods, + treatment_groups = panel_obj$treatment_groups, + anticipation = panel_obj$anticipation, + inference_type = if (n_bootstrap_internal > 0L) "bootstrap" else "analytical", + clustervars = clustervars, + xformla = xformla, + bstrap = bstrap, + cells = cells, + att_gt = att_gt_df, + overall = overall_res, + event_study = event_study_res, + group = group_res, + eif = eif_export, + bootstrap = bootstrap_res, + n_bootstrap = n_bootstrap_internal, + bootstrap_weights = bootstrap_weights + ) + + class(edid_fit) <- c("edid_fit", "list") + edid_fit +} diff --git a/benchmark/compare_att_gt_edid.R b/benchmark/compare_att_gt_edid.R new file mode 100644 index 0000000..b92fcc7 --- /dev/null +++ b/benchmark/compare_att_gt_edid.R @@ -0,0 +1,392 @@ +############################################################################### +# compare_att_gt_edid.R +# +# Simulate a staggered adoption panel and compare estimates from: +# (1) att_gt() -- Callaway & Sant'Anna (2021) +# (2) edid() -- Chen, Sant'Anna & Xie (2025), efficient DiD +# +# True ATT = 1 in all post-treatment periods (homogeneous, no dynamics). +############################################################################### + +devtools::load_all(quiet = TRUE) # loads att_gt(), edid(), and all helpers + +set.seed(2025) + +# ── 1. Simulate balanced staggered panel ────────────────────────────────────── +sp <- reset.sim(time.periods = 6, n = 2000) +df <- build_sim_dataset(sp_list = sp, panel = TRUE) + +# build_sim_dataset returns G = 0 for never-treated (att_gt convention). +# edid() now accepts G=0 directly and converts 0 -> Inf internally, +# so we no longer need a separate G_edid column. + +cat("─── Dataset summary ────────────────────────────────────────────────────\n") +cat("Periods:", sort(unique(df$period)), "\n") +cat("Cohorts (att_gt coding, 0 = never):", sort(unique(df$G)), "\n") +cat("Units:", length(unique(df$id)), " | Obs:", nrow(df), "\n\n") + + +# ── 2. Fit att_gt() (Callaway-Sant'Anna, no covariates) ─────────────────────── +cs <- att_gt( + yname = "Y", + tname = "period", + idname = "id", + gname = "G", + data = df, + control_group = "nevertreated", + est_method = "reg", # outcome-regression, closest to no-cov DiD + bstrap = FALSE, + cband = FALSE, + print_details = FALSE +) + +cs_overall <- aggte(cs, type = "simple") +cs_eventstudy <- aggte(cs, type = "dynamic") +cs_group <- aggte(cs, type = "group") + + +# ── 3. Fit edid() (Chen-Sant'Anna-Xie, no covariates, PT-All) ───────────────── +# Note: gname = "G" directly -- edid() auto-converts G=0 to Inf internally. +ed <- edid( + data = df, + yname = "Y", + idname = "id", + tname = "period", + gname = "G", + pt_assumption = "all", + control_group = "nevertreated", + alp = 0.05 +) + + +# ── 4. Compare cell-level ATT(g, t) ────────────────────────────────────────── +cat("─── Cell-level ATT(g,t) comparison ─────────────────────────────────────\n") + +# CS cell-level estimates +cs_gt <- data.frame( + group = cs$group, + time = cs$t, + att_cs = round(cs$att, 4), + se_cs = round(cs$se, 4), + is_pre = cs$t < cs$group +) + +# EDiD cell-level estimates (post-treatment only, drop skipped cells) +ed_gt <- ed$att_gt[!is.na(ed$att_gt$att), c("group", "time", "att", "se", "is_pre")] +names(ed_gt)[3:4] <- c("att_ed", "se_ed") +ed_gt$att_ed <- round(ed_gt$att_ed, 4) +ed_gt$se_ed <- round(ed_gt$se_ed, 4) + +# Merge on (group, time) +cmp_gt <- merge(cs_gt, ed_gt, by = c("group", "time", "is_pre"), all = TRUE) +cmp_gt <- cmp_gt[order(cmp_gt$is_pre, cmp_gt$group, cmp_gt$time), ] + +# Highlight post-treatment cells +post_gt <- subset(cmp_gt, !is_pre) +pre_gt <- subset(cmp_gt, is_pre) + +cat("\nPost-treatment cells (ATT should \u2248 1):\n") +print(post_gt, row.names = FALSE) + +cat("\nPre-treatment placebo cells (ATT should \u2248 0):\n") +print(pre_gt, row.names = FALSE) + +cat("\nMean ATT diff (post): CS - EDiD =", + round(mean(post_gt$att_cs - post_gt$att_ed, na.rm = TRUE), 4), "\n") + + +# ── 5. Compare overall ATT ──────────────────────────────────────────────────── +cat("\n─── Overall ATT ─────────────────────────────────────────────────────────\n") +cat(sprintf(" %-20s ATT = %6.4f SE = %6.4f 95%% CI = [%6.4f, %6.4f]\n", + "att_gt (CS-simple):", + cs_overall$overall.att, + cs_overall$overall.se, + cs_overall$overall.att - qnorm(0.975) * cs_overall$overall.se, + cs_overall$overall.att + qnorm(0.975) * cs_overall$overall.se)) +cat(sprintf(" %-20s ATT = %6.4f SE = %6.4f 95%% CI = [%6.4f, %6.4f]\n", + "edid (PT-Post):", + ed$overall$att, + ed$overall$se, + ed$overall$ci_lower, + ed$overall$ci_upper)) +cat(" True ATT = 1\n") + + +# ── 6. Compare event-study ──────────────────────────────────────────────────── +cat("\n─── Event-study (relative-time ATTs) ───────────────────────────────────\n") + +# CS event-study +cs_es <- data.frame( + rel_time = cs_eventstudy$egt, + att_cs = round(cs_eventstudy$att.egt, 4), + se_cs = round(cs_eventstudy$se.egt, 4) +) + +# EDiD event-study +ed_es_names <- names(ed$event_study) +ed_es <- data.frame( + rel_time = as.numeric(sub("^e", "", ed_es_names)), + att_ed = round(vapply(ed$event_study, `[[`, numeric(1), "att"), 4), + se_ed = round(vapply(ed$event_study, `[[`, numeric(1), "se"), 4) +) + +cmp_es <- merge(cs_es, ed_es, by = "rel_time", all = TRUE) +cmp_es <- cmp_es[order(cmp_es$rel_time), ] +print(cmp_es, row.names = FALSE) + + +# ── 7. Compare group-level ATTs ─────────────────────────────────────────────── +cat("\n─── Group-level ATTs ────────────────────────────────────────────────────\n") + +cs_grp <- data.frame( + group = cs_group$egt, + att_cs = round(cs_group$att.egt, 4), + se_cs = round(cs_group$se.egt, 4) +) + +ed_grp_names <- names(ed$group) +ed_grp <- data.frame( + group = as.numeric(sub("^g", "", ed_grp_names)), + att_ed = round(vapply(ed$group, `[[`, numeric(1), "att"), 4), + se_ed = round(vapply(ed$group, `[[`, numeric(1), "se"), 4) +) + +cmp_grp <- merge(cs_grp, ed_grp, by = "group", all = TRUE) +cmp_grp <- cmp_grp[order(cmp_grp$group), ] +print(cmp_grp, row.names = FALSE) + + +# ── 8. SE comparison: who is more efficient? ───────────────────────────────── +cat("\n─── Efficiency comparison (post-treatment cells only) ──────────────────\n") +post_with_both <- subset(post_gt, !is.na(att_cs) & !is.na(att_ed)) +post_with_both$se_ratio <- round(post_with_both$se_cs / post_with_both$se_ed, 3) +cat("SE ratio = se(CS) / se(EDiD). Ratio > 1 means EDiD is more efficient.\n") +print(post_with_both[, c("group", "time", "se_cs", "se_ed", "se_ratio")], + row.names = FALSE) +cat(sprintf("\nMedian SE ratio (CS / EDiD): %.3f\n", + median(post_with_both$se_ratio, na.rm = TRUE))) + + +# ── 9. Quick plot: event-study side by side ─────────────────────────────────── +if (requireNamespace("ggplot2", quietly = TRUE)) { + library(ggplot2) + + es_plot <- rbind( + transform(cmp_es[!is.na(cmp_es$att_cs), ], + att = att_cs, + se = se_cs, + Estimator = "att_gt (CS)")[, c("rel_time", "att", "se", "Estimator")], + transform(cmp_es[!is.na(cmp_es$att_ed), ], + att = att_ed, + se = se_ed, + Estimator = "edid (CSX)" )[, c("rel_time", "att", "se", "Estimator")] + ) + es_plot$ci_lo <- es_plot$att - qnorm(0.975) * es_plot$se + es_plot$ci_hi <- es_plot$att + qnorm(0.975) * es_plot$se + + # Dodge slightly so CIs don't overlap + es_plot$rel_time_jit <- ifelse(es_plot$Estimator == "att_gt (CS)", + es_plot$rel_time - 0.1, + es_plot$rel_time + 0.1) + + p <- ggplot(es_plot, aes(x = rel_time_jit, y = att, + color = Estimator, fill = Estimator)) + + geom_hline(yintercept = 0, linetype = "dashed", color = "grey60") + + geom_hline(yintercept = 1, linetype = "dotted", color = "black", alpha = 0.5) + + geom_ribbon(aes(ymin = ci_lo, ymax = ci_hi), alpha = 0.15, color = NA) + + geom_line(aes(x = rel_time_jit)) + + geom_point(size = 2) + + geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi), width = 0.15) + + scale_x_continuous(breaks = sort(unique(es_plot$rel_time)), + labels = sort(unique(es_plot$rel_time))) + + labs( + title = "Event-study: att_gt (CS) vs edid (CSX)", + subtitle = "Dotted line = true ATT = 1", + x = "Relative time (periods since first treatment)", + y = "ATT estimate", + color = NULL, fill = NULL + ) + + theme_bw(base_size = 13) + + theme(legend.position = "bottom") + + print(p) + cat("\nEvent-study plot printed.\n") +} + +cat("\n─── Done ────────────────────────────────────────────────────────────────\n") + + +############################################################################### +# Part II: R edid() vs Python diff-diff on the canonical diff-diff dataset +# +# Dataset: generate_staggered_data(n_units=300, n_periods=10, ATT=2.0, seed=42) +# Cohorts (first_treat): 0=never-treated, 3, 5, 7 +# Periods: 0–9 (0-indexed; period 0 = period_1, excluded from DiD pairs) +# True ATT = 2.0 +# +# Python benchmark CSVs are generated by: +# python3 benchmark/generate_diffdiff_benchmark.py +# +# If the CSVs are missing this section is skipped with a warning. +############################################################################### + +rm(list = ls()) + +cat("\n\n") +cat("═══════════════════════════════════════════════════════════════════════\n") +cat(" PART II: R edid() vs Python diff-diff (canonical diff-diff dataset) \n") +cat("═══════════════════════════════════════════════════════════════════════\n\n") + +# ── Locate benchmark CSVs ───────────────────────────────────────────────────── +bench_dir <- file.path(getwd(), "benchmark", "data") +panel_path <- file.path(bench_dir, "diffdiff_panel.csv") + +if (!file.exists(panel_path)) { + warning( + "Benchmark CSVs not found at '", bench_dir, "'.\n", + "Run: python3 benchmark/generate_diffdiff_benchmark.py\n", + "Then re-run this script to see the diff-diff comparison." + ) +} else { + + # ── 10. Load data and fit R edid() ───────────────────────────────────────── + cat("─── 10. Load diff-diff panel and fit R edid() ─────────────────────────\n") + dd_panel <- read.csv(panel_path) + + cat("Dataset summary:\n") + cat(" Periods :", sort(unique(dd_panel$period)), "\n") + cat(" Cohorts :", sort(unique(dd_panel$first_treat)), + " (0 = never-treated)\n") + cat(" Units :", length(unique(dd_panel$unit)), + " | Obs:", nrow(dd_panel), "\n\n") + + # first_treat == 0 => never-treated; edid() auto-converts 0 -> Inf + ed_dd <- edid( + data = dd_panel, + yname = "outcome", + idname = "unit", + tname = "period", + gname = "first_treat", + pt_assumption = "all", + control_group = "nevertreated", + alp = 0.05 + ) + + # ── 11. Load Python results ──────────────────────────────────────────────── + py_gt <- read.csv(file.path(bench_dir, "py_edid_all_gt.csv")) + py_ov <- read.csv(file.path(bench_dir, "py_edid_all_overall.csv")) + py_es <- read.csv(file.path(bench_dir, "py_edid_all_es.csv")) + py_grp <- read.csv(file.path(bench_dir, "py_edid_all_grp.csv")) + + # ── 12. Cell-level ATT(g,t) comparison ───────────────────────────────────── + cat("─── 11. Cell-level ATT(g,t): R edid() vs Python diff-diff ────────────\n") + + # Python GT columns: group, time, effect, se, t_stat, p_value, conf_int_lower, conf_int_upper + py_gt_clean <- data.frame( + group = py_gt$group, + time = py_gt$time, + att_py = round(py_gt$effect, 4), + se_py = round(py_gt$se, 4) + ) + + # R edid GT: group, time, att, se, is_pre + r_gt_clean <- ed_dd$att_gt[!is.na(ed_dd$att_gt$att), + c("group", "time", "att", "se", "is_pre")] + names(r_gt_clean)[3:4] <- c("att_r", "se_r") + r_gt_clean$att_r <- round(r_gt_clean$att_r, 4) + r_gt_clean$se_r <- round(r_gt_clean$se_r, 4) + + cmp_dd <- merge(py_gt_clean, r_gt_clean, by = c("group", "time"), all = TRUE) + cmp_dd <- cmp_dd[order(cmp_dd$is_pre, cmp_dd$group, cmp_dd$time), ] + + # Absolute difference + cmp_dd$diff_att <- round(abs(cmp_dd$att_py - cmp_dd$att_r), 5) + cmp_dd$diff_se <- round(abs(cmp_dd$se_py - cmp_dd$se_r), 5) + + cat("\nPost-treatment cells (ATT should \u2248 2.0):\n") + print(subset(cmp_dd, !is_pre)[, + c("group", "time", "att_py", "att_r", "diff_att", + "se_py", "se_r", "diff_se")], + row.names = FALSE) + + cat("\nPre-treatment placebo cells (ATT should \u2248 0):\n") + print(subset(cmp_dd, is_pre)[, + c("group", "time", "att_py", "att_r", "diff_att", + "se_py", "se_r", "diff_se")], + row.names = FALSE) + + post_dd <- subset(cmp_dd, !is_pre) + cat(sprintf("\nMax |diff| in ATT (post): %.5f\n", + max(post_dd$diff_att, na.rm = TRUE))) + cat(sprintf("Max |diff| in SE (post): %.5f\n", + max(post_dd$diff_se, na.rm = TRUE))) + + # ── 13. Overall ATT comparison ───────────────────────────────────────────── + cat("\n─── 12. Overall ATT comparison ─────────────────────────────────────────\n") + cat(sprintf(" %-25s ATT = %7.4f SE = %7.4f 95%% CI = [%7.4f, %7.4f]\n", + "Python diff-diff (EDiD):", + py_ov$att, py_ov$se, py_ov$ci_lower, py_ov$ci_upper)) + cat(sprintf(" %-25s ATT = %7.4f SE = %7.4f 95%% CI = [%7.4f, %7.4f]\n", + "R edid() (PT-All):", + ed_dd$overall$att, + ed_dd$overall$se, + ed_dd$overall$ci_lower, + ed_dd$overall$ci_upper)) + cat(" True ATT = 2.0\n") + cat(sprintf(" |diff| ATT : %.5f\n", + abs(py_ov$att - ed_dd$overall$att))) + cat(sprintf(" |diff| SE : %.5f\n", + abs(py_ov$se - ed_dd$overall$se))) + + # ── 14. Event-study comparison ───────────────────────────────────────────── + cat("\n─── 13. Event-study comparison ─────────────────────────────────────────\n") + + # Python event study columns: relative_period, effect, se, ... + py_es_df <- data.frame( + rel_time = py_es$relative_period, + att_py = round(py_es$effect, 4), + se_py = round(py_es$se, 4) + ) + + # R event study: named list, each element has $e, $att, $se + r_es_df <- do.call(rbind, lapply(ed_dd$event_study, function(x) { + data.frame(rel_time = x$e, att_r = round(x$att, 4), se_r = round(x$se, 4)) + })) + r_es_df <- r_es_df[order(r_es_df$rel_time), ] + + cmp_es_dd <- merge(py_es_df, r_es_df, by = "rel_time", all = TRUE) + cmp_es_dd <- cmp_es_dd[order(cmp_es_dd$rel_time), ] + cmp_es_dd$diff_att <- round(abs(cmp_es_dd$att_py - cmp_es_dd$att_r), 5) + cmp_es_dd$diff_se <- round(abs(cmp_es_dd$se_py - cmp_es_dd$se_r), 5) + + print(cmp_es_dd, row.names = FALSE) + cat(sprintf("\nMax |diff| in event-study ATT: %.5f\n", + max(cmp_es_dd$diff_att, na.rm = TRUE))) + cat(sprintf("Max |diff| in event-study SE : %.5f\n", + max(cmp_es_dd$diff_se, na.rm = TRUE))) + + # ── 15. Group-level comparison ────────────────────────────────────────────── + cat("\n─── 14. Group-level ATT comparison ─────────────────────────────────────\n") + + py_grp_df <- data.frame( + group = py_grp$group, + att_py = round(py_grp$effect, 4), + se_py = round(py_grp$se, 4) + ) + + r_grp_df <- do.call(rbind, lapply(ed_dd$group, function(x) { + data.frame(group = x$group, att_r = round(x$att, 4), se_r = round(x$se, 4)) + })) + + cmp_grp_dd <- merge(py_grp_df, r_grp_df, by = "group", all = TRUE) + cmp_grp_dd$diff_att <- round(abs(cmp_grp_dd$att_py - cmp_grp_dd$att_r), 5) + cmp_grp_dd$diff_se <- round(abs(cmp_grp_dd$se_py - cmp_grp_dd$se_r), 5) + print(cmp_grp_dd, row.names = FALSE) + + cat("\n─── Summary ─────────────────────────────────────────────────────────────\n") + cat("Differences < 1e-4 indicate numerical equivalence.\n") + cat("Differences > 0.01 on estimates or SE warrant investigation.\n") + +} # end if (file.exists(panel_path)) + +cat("\n─── Done (Part II) ──────────────────────────────────────────────────────\n") diff --git a/benchmark/compare_author_vs_edid.R b/benchmark/compare_author_vs_edid.R new file mode 100644 index 0000000..5c71863 --- /dev/null +++ b/benchmark/compare_author_vs_edid.R @@ -0,0 +1,333 @@ +############################################################################### +# benchmark/compare_author_vs_edid.R +# +# Validates R edid() against the author's reference implementation +# (efficient_did_unc_stagg from Chen, Sant'Anna & Xie 2025, shared code). +# +# Dataset: diffdiff benchmark panel (n=300, T=10, ATT=2, cohorts 0/3/5/7) +# generated by: python3 benchmark/generate_diffdiff_benchmark.py +# +# Usage (from project root): +# Rscript benchmark/compare_author_vs_edid.R +# or in RStudio: source("benchmark/compare_author_vs_edid.R") +############################################################################### + +rm(list = ls()) + +suppressPackageStartupMessages({ + devtools::load_all(quiet = TRUE) + library(dplyr) + library(BMisc) # required by es_efficient() for TorF() / getListElement() +}) + +source("benchmark/edid_sim_original.R") # loads efficient_did_unc_stagg() + +panel_path <- file.path("benchmark", "data", "diffdiff_panel.csv") +if (!file.exists(panel_path)) { + stop("Benchmark panel not found.\n", + "Run: python3 benchmark/generate_diffdiff_benchmark.py") +} +panel <- read.csv(panel_path) + +cat("═══════════════════════════════════════════════════════════════════════\n") +cat(" Author (CSX reference code) vs R edid() — staggered, no covariates \n") +cat("═══════════════════════════════════════════════════════════════════════\n\n") +cat("Data: n =", length(unique(panel$unit)), + " | T =", length(unique(panel$period)), + " | periods:", paste(sort(unique(panel$period)), collapse = " "), + " | cohorts:", paste(sort(unique(panel$first_treat)), collapse = " "), + "(0 = never-treated)\n") +cat("True ATT = 2.0\n\n") + + +# ── A. Author's efficient_did_unc_stagg ────────────────────────────────────── +# +# Key design choices in the author's code: +# 1. Comparison cohorts (g') iterate over TREATED cohorts only [3, 5, 7]. +# Never-treated (G=0) is NOT used as a comparison cohort (gp). +# 2. The never-treated (G=0) appears ONLY as the direct control: +# E[Y_inf(t) - Y_inf(t')] +# 3. Valid (g', t') pairs: +# g'==g -> t_min <= t' < g' (includes period_1 = t_min) +# g'!=g -> t_min < t' < g' (excludes period_1) +# 4. Omega is estimated as sample cov of the per-pair influence functions. + +cat("─── A. Author's efficient_did_unc_stagg ────────────────────────────────\n") + +res_author <- efficient_did_unc_stagg( + data = panel, + yname = "outcome", + tname = "period", + gname = "first_treat", + idname = "unit", + return_weights = TRUE +) + +wt_author <- as.data.frame(res_author$weights) +# Count valid pairs per (g,t) cell +n_pairs_author <- wt_author |> + group_by(group, time) |> + summarise(n_pairs_author = n(), .groups = "drop") |> + filter(time >= group) # post-treatment only + +post_mask_auth <- res_author$time >= res_author$group +auth_gt <- data.frame( + group = res_author$group[post_mask_auth], + time = res_author$time[post_mask_auth], + att_a = round(res_author$estimate[post_mask_auth], 4), + se_a = round(res_author$std.error[post_mask_auth], 4) +) + +cat("\nPost-treatment ATT(g,t) -- author:\n") +print(auth_gt, row.names = FALSE) + + +# ── B. R edid() (PT-All, nevertreated) ─────────────────────────────────────── +# +# Key design choices in our R edid(): +# 1. Comparison cohorts (gp) include BOTH treated cohorts [3,5,7] AND +# the never-treated cohort (gp=Inf). +# 2. Valid (gp, tpre) pairs (for finite gp): +# tpre < eff_start(gp) AND tpre != period_1 (period_1 = 0 here) +# For gp=Inf: +# tpre can be any period except period_1 +# 3. Omega is computed analytically from closed-form within-group covariances. +# 4. The EIF is computed per unit and uses (after yesterday's fix) the full +# baseline-shift term when gp=Inf. + +cat("\n─── B. R edid() PT-All nevertreated ────────────────────────────────────\n") + +# first_treat=0 -> edid() auto-converts to Inf (never-treated) +res_r <- edid( + data = panel, + yname = "outcome", + idname = "unit", + tname = "period", + gname = "first_treat", + pt_assumption = "all", + control_group = "nevertreated", + alp = 0.05 +) + +r_gt_all <- res_r$att_gt[!is.na(res_r$att_gt$att), ] +r_gt <- r_gt_all[!r_gt_all$is_pre, c("group", "time", "att", "se")] +names(r_gt)[3:4] <- c("att_r", "se_r") +r_gt$att_r <- round(r_gt$att_r, 4) +r_gt$se_r <- round(r_gt$se_r, 4) + +# n_pairs per cell from the cell objects +n_pairs_r <- vapply(res_r$cells, function(x) x$n_pairs %||% 0L, integer(1L)) +cell_idx <- data.frame( + group = vapply(res_r$cells, `[[`, numeric(1), "group"), + time = vapply(res_r$cells, `[[`, numeric(1), "time"), + is_pre = vapply(res_r$cells, `[[`, logical(1), "is_pre"), + n_pairs = n_pairs_r +) +n_pairs_r_post <- cell_idx[!cell_idx$is_pre, c("group", "time", "n_pairs")] +names(n_pairs_r_post)[3] <- "n_pairs_r" + +cat("\nPost-treatment ATT(g,t) -- R edid():\n") +print(r_gt, row.names = FALSE) + + +# ── C. Side-by-side comparison ─────────────────────────────────────────────── +cat("\n─── C. Side-by-side: author vs R edid() ────────────────────────────────\n") + +cmp <- merge(auth_gt, r_gt, by = c("group", "time")) +cmp <- merge(cmp, n_pairs_author, by = c("group", "time"), all.x = TRUE) +cmp <- merge(cmp, n_pairs_r_post, by = c("group", "time"), all.x = TRUE) +cmp <- cmp[order(cmp$group, cmp$time), ] + +cmp$diff_att <- round(cmp$att_r - cmp$att_a, 4) +cmp$diff_se <- round(cmp$se_r - cmp$se_a, 4) + +print(cmp[, c("group", "time", + "n_pairs_author", "n_pairs_r", + "att_a", "att_r", "diff_att", + "se_a", "se_r", "diff_se")], + row.names = FALSE) + +cat(sprintf("\nMax |diff| ATT : %.5f\n", max(abs(cmp$diff_att), na.rm = TRUE))) +cat(sprintf("Max |diff| SE : %.5f\n", max(abs(cmp$diff_se), na.rm = TRUE))) +cat(sprintf("Median SE ratio (author/edid): %.3f (>1 => edid is more efficient)\n", + median(cmp$se_a / cmp$se_r, na.rm = TRUE))) + + +# ── D. Structural differences summary ─────────────────────────────────────── +cat("\n─── D. Structural differences (moments / valid pairs) ──────────────────\n") + +# Enumerate author's pairs and our pairs for one representative cell: (g=3, t=3) +g_ex <- 3; t_ex <- 3 +auth_pairs_ex <- wt_author[wt_author$group == g_ex & wt_author$time == t_ex, + c("g_prime", "t_prime")] +names(auth_pairs_ex) <- c("gp", "tpre") +auth_pairs_ex <- auth_pairs_ex[order(auth_pairs_ex$gp, auth_pairs_ex$tpre), ] + +# Our pairs: from the panel object +# re-derive manually for transparency +periods <- sort(unique(panel$period)) +period_1 <- min(periods) +cohorts <- sort(unique(panel$first_treat[panel$first_treat > 0])) +r_pairs_ex <- do.call(rbind, lapply(c(cohorts, Inf), function(gp) { + if (is.finite(gp)) { + valid_tpre <- periods[periods < gp & periods != period_1] + } else { + valid_tpre <- periods[periods != period_1] + } + if (length(valid_tpre) == 0) return(NULL) + data.frame(gp = gp, tpre = valid_tpre) +})) +r_pairs_ex <- r_pairs_ex[order(r_pairs_ex$gp, r_pairs_ex$tpre), ] + +cat(sprintf("\nFor cell (g=%d, t=%d):\n", g_ex, t_ex)) +cat(sprintf(" Author: %d pairs [g' ∈ {3,5,7} only, never-treated NOT a comparison cohort]\n", + nrow(auth_pairs_ex))) +cat(sprintf(" edid(): %d pairs [gp ∈ {3,5,7,Inf}, period_1=%d excluded]\n", + nrow(r_pairs_ex), period_1)) + +cat("\n Author's pairs (g_prime, t_prime):\n") +print(auth_pairs_ex, row.names = FALSE) + +cat("\n edid() pairs (gp, tpre) [Inf = never-treated]:\n") +r_pairs_print <- r_pairs_ex +r_pairs_print$gp <- ifelse(is.infinite(r_pairs_print$gp), "Inf", as.character(r_pairs_print$gp)) +print(r_pairs_print, row.names = FALSE) + +cat("\n Pairs in author but NOT in edid(): (g'=g, t'=period_1) pairs\n") +in_auth_not_r <- author_only <- auth_pairs_ex[ + auth_pairs_ex$tpre == period_1 & auth_pairs_ex$gp == g_ex, ] +if (nrow(in_auth_not_r) == 0) { + cat(" none\n") +} else { + print(in_auth_not_r, row.names = FALSE) + cat(" Note: when g'=g and t'=period_1, IF_g_prime = 0, so this pair\n") + cat(" contributes zero new information (reduces to standard CS DiD).\n") +} + +cat("\n Pairs in edid() but NOT in author: gp=Inf pairs\n") +inf_only <- r_pairs_ex[is.infinite(r_pairs_ex$gp), ] +inf_only$gp <- "Inf" +print(inf_only, row.names = FALSE) +cat(" Note: gp=Inf pairs exploit the never-treated cohort as\n") +cat(" comparison cohort, providing additional identifying moments.\n") + + +# ── E. Overall ATT comparison ──────────────────────────────────────────────── +cat("\n─── E. Overall ATT ─────────────────────────────────────────────────────\n") + +# Author: compute aggregated overall = cohort-share weighted average of post cells +glist_auth <- sort(unique(panel$first_treat[panel$first_treat > 0])) +n_auth <- length(unique(panel$unit)) +pi_g <- sapply(glist_auth, function(g) mean(panel$first_treat == g)) +names(pi_g) <- as.character(glist_auth) + +auth_att_overall <- sum(vapply(glist_auth, function(g) { + idx <- which(auth_gt$group == g) + if (length(idx) == 0) return(0) + pi_g[as.character(g)] * mean(auth_gt$att_a[idx]) +}, numeric(1))) / sum(pi_g) + +cat(sprintf(" Author overall ATT (manual avg): %.4f\n", auth_att_overall)) +cat(sprintf(" R edid() overall ATT : %.4f SE = %.4f CI = [%.4f, %.4f]\n", + res_r$overall$att, res_r$overall$se, + res_r$overall$ci_lower, res_r$overall$ci_upper)) +cat(" True ATT = 2.0\n") + + +# ── F. Omega: sample-cov (author) vs analytical (edid) ───────────────────── +cat("\n─── F. Omega* comparison: sample cov (author) vs analytical (edid) ──────\n") +cat("Check whether the two Omega matrices agree for cell (g=3, t=3).\n\n") + +# Author's Omega for (g=3, t=3) +auth_if <- as.matrix(res_author$eff_inf_function) # n x n_gt +# find column index for (g=3, t=3) +g3t3_col_auth <- which( + res_author$group == g_ex & res_author$time == t_ex +) +# Get the raw (pre-combination) IFs: stored in att_gt_results which we don't +# have directly. Re-run with lower-level access. +# Recompute inside the function scope by running directly: +cat("(Re-running IF computation for (g=3,t=3) to extract raw Omega...)\n") + +df_tmp <- as.data.frame(panel) +Y_tmp <- df_tmp$outcome +Time_tmp <- df_tmp$period +G_tmp <- df_tmp$first_treat +sample_size_tmp <- length(unique(df_tmp$unit)) +G_cs_tmp <- G_tmp[Time_tmp == min(Time_tmp)] +tlist_tmp <- sort(unique(df_tmp$period)) +glist_tmp <- sort(unique(df_tmp$first_treat)) +g_treated_tmp <- glist_tmp[glist_tmp > 0] +pi2_tmp <- sapply(glist_tmp, function(gv) mean(G_tmp == gv)) + +# The pairs for (g=3, tt=3) as in author's code +pairs_3_3 <- auth_pairs_ex # already extracted above + +# Compute per-pair IFs +IF_mat_auth <- matrix(0, nrow = sample_size_tmp, ncol = nrow(pairs_3_3)) +for (kk in seq_len(nrow(pairs_3_3))) { + gp_v <- pairs_3_3$gp[kk] + tp_v <- pairs_3_3$tpre[kk] + mean_g_t <- mean(Y_tmp[G_tmp==g_ex & Time_tmp==t_ex]) - + mean(Y_tmp[G_tmp==g_ex & Time_tmp==min(Time_tmp)]) + mean_inf_tpre <- mean(Y_tmp[G_tmp==0 & Time_tmp==t_ex]) - + mean(Y_tmp[G_tmp==0 & Time_tmp==tp_v]) + mean_gp_tpre <- mean(Y_tmp[G_tmp==gp_v & Time_tmp==tp_v]) - + mean(Y_tmp[G_tmp==gp_v & Time_tmp==min(Time_tmp)]) + IF_g <- (G_cs_tmp==g_ex)/pi2_tmp[which(glist_tmp==g_ex)] * + (Y_tmp[Time_tmp==t_ex] - Y_tmp[Time_tmp==min(Time_tmp)] - mean_g_t) + IF_inf <- (G_cs_tmp==0)/pi2_tmp[which(glist_tmp==0)] * + (Y_tmp[Time_tmp==t_ex] - Y_tmp[Time_tmp==tp_v] - mean_inf_tpre) + IF_gp <- (G_cs_tmp==gp_v)/pi2_tmp[which(glist_tmp==gp_v)] * + (Y_tmp[Time_tmp==tp_v] - Y_tmp[Time_tmp==min(Time_tmp)] - mean_gp_tpre) + IF_mat_auth[, kk] <- IF_g - (IF_inf + IF_gp) +} + +Omega_auth_sample <- cov(IF_mat_auth) # 13 x 13 + +# Our edid() Omega for (g=3, t=3): re-derive from panel_obj +panel_obj_r <- did:::prepare_edid_panel( + data = panel, + yname = "outcome", idname = "unit", tname = "period", gname = "first_treat", + control_group = "nevertreated" +) +# Convert G=0 -> Inf first (as edid() does) +panel2 <- panel +panel2$first_treat[panel2$first_treat == 0] <- Inf +panel_obj_r2 <- did:::prepare_edid_panel( + data = panel2, + yname = "outcome", idname = "unit", tname = "period", gname = "first_treat", + control_group = "nevertreated" +) +pairs_r_3_3 <- did:::enumerate_valid_pairs_edid( + target_g = g_ex, + treatment_groups = panel_obj_r2$treatment_groups, + time_periods = panel_obj_r2$time_periods, + period_1 = panel_obj_r2$period_1, + pt_assumption = "all" +) +Omega_r_analytical <- did:::compute_omega_star_nocov_edid( + target_g = g_ex, + target_t = t_ex, + pairs = pairs_r_3_3, + panel_obj = panel_obj_r2, + pt_assumption = "all" +) + +cat(sprintf(" Author Omega (sample cov): %d x %d matrix\n", + nrow(Omega_auth_sample), ncol(Omega_auth_sample))) +cat(sprintf(" edid() Omega (analytical): %d x %d matrix\n", + nrow(Omega_r_analytical), ncol(Omega_r_analytical))) + +# Compare condition numbers +kappa_auth <- max(svd(Omega_auth_sample, nu=0, nv=0)$d) / + min(svd(Omega_auth_sample, nu=0, nv=0)$d[ + svd(Omega_auth_sample, nu=0, nv=0)$d > 0]) +kappa_r <- max(svd(Omega_r_analytical, nu=0, nv=0)$d) / + min(svd(Omega_r_analytical, nu=0, nv=0)$d[ + svd(Omega_r_analytical, nu=0, nv=0)$d > 0]) +cat(sprintf("\n Condition number (author Omega): %.2e\n", kappa_auth)) +cat(sprintf(" Condition number (edid Omega) : %.2e\n", kappa_r)) + + +cat("─── Done ────────────────────────────────────────────────────────────────\n") diff --git a/benchmark/data/diffdiff_panel.csv b/benchmark/data/diffdiff_panel.csv new file mode 100644 index 0000000..2169440 --- /dev/null +++ b/benchmark/data/diffdiff_panel.csv @@ -0,0 +1,3001 @@ +unit,period,outcome,first_treat +0,0,11.278160550821887,0 +0,1,11.83561476480023,0 +0,2,11.542111740976484,0 +0,3,11.716260053270108,0 +0,4,12.289790657946211,0 +0,5,10.978500951407096,0 +0,6,11.426794972896793,0 +0,7,11.433938438212763,0 +0,8,11.108223077010022,0 +0,9,12.035898916856585,0 +1,0,8.507481589844117,0 +1,1,8.090322606788959,0 +1,2,8.431659680951173,0 +1,3,8.977373388335643,0 +1,4,9.196607706114092,0 +1,5,8.598195672617177,0 +1,6,9.409557584764945,0 +1,7,8.8788279691973,0 +1,8,8.514065528066515,0 +1,9,8.735564463880879,0 +2,0,8.848856627982231,0 +2,1,9.245057957397913,0 +2,2,9.610116540227525,0 +2,3,10.290917393954041,0 +2,4,10.849509799847732,0 +2,5,9.398034126576219,0 +2,6,10.3855386183947,0 +2,7,9.768852914685972,0 +2,8,10.626473428496341,0 +2,9,10.423581558920876,0 +3,0,7.184502977255072,0 +3,1,8.664104385963078,0 +3,2,7.99745553377447,0 +3,3,8.13300577099206,0 +3,4,7.965079684533992,0 +3,5,8.27281425183506,0 +3,6,8.913901110526295,0 +3,7,8.705333719710634,0 +3,8,9.064286891320078,0 +3,9,9.18091681714326,0 +4,0,9.982264675962957,0 +4,1,9.250540772766684,0 +4,2,8.783504939776007,0 +4,3,10.155545056484867,0 +4,4,9.556189762150247,0 +4,5,10.379230070487562,0 +4,6,10.113622404790108,0 +4,7,9.956365083097202,0 +4,8,10.296321795222022,0 +4,9,11.197440128262024,0 +5,0,12.719106539441666,0 +5,1,11.81530684248118,0 +5,2,11.960711571573327,0 +5,3,12.518736353647984,0 +5,4,11.657785758531118,0 +5,5,12.347151461223918,0 +5,6,12.267684850937862,0 +5,7,12.537570380643567,0 +5,8,12.063931871940088,0 +5,9,11.785832528260306,0 +6,0,5.508867435619657,0 +6,1,6.086667097120499,0 +6,2,6.516021511145636,0 +6,3,6.698763360290922,0 +6,4,7.913974734830062,0 +6,5,7.598355838568952,0 +6,6,6.664313595444489,0 +6,7,7.419213379840782,0 +6,8,7.141820035440141,0 +6,9,7.303177234595256,0 +7,0,10.961510111799406,0 +7,1,11.278432845579411,0 +7,2,10.899218045149706,0 +7,3,11.700773053458894,0 +7,4,10.697878174020513,0 +7,5,11.372016818272849,0 +7,6,12.76768415038963,0 +7,7,11.68038715840435,0 +7,8,12.385454540394802,0 +7,9,11.81460737617832,0 +8,0,10.765859752329455,0 +8,1,10.54707960746821,0 +8,2,10.590267414082453,0 +8,3,10.385730006301847,0 +8,4,11.090621884143333,0 +8,5,10.549702610071673,0 +8,6,11.40826382282127,0 +8,7,11.71811470688751,0 +8,8,11.458736908283171,0 +8,9,11.23234683788671,0 +9,0,9.03868287826076,0 +9,1,8.757363560833204,0 +9,2,9.479473651338985,0 +9,3,8.19599704649885,0 +9,4,9.043896404547088,0 +9,5,8.316294091044412,0 +9,6,8.664169673492808,0 +9,7,10.193631203513366,0 +9,8,10.059292579592254,0 +9,9,9.351959971964016,0 +10,0,6.356632563202642,0 +10,1,5.725619872302264,0 +10,2,7.036136537256773,0 +10,3,8.618091797346791,0 +10,4,7.725326426954915,0 +10,5,7.328098148198346,0 +10,6,7.940424395973243,0 +10,7,7.02740511472587,0 +10,8,7.759222609874272,0 +10,9,8.05762302773096,0 +11,0,10.101208689517215,0 +11,1,10.63966207627278,0 +11,2,10.516581628545765,0 +11,3,10.778422024481738,0 +11,4,10.20007287431236,0 +11,5,11.093166719633013,0 +11,6,11.558727489239734,0 +11,7,10.359184255602033,0 +11,8,10.50041118757046,0 +11,9,11.712151183587856,0 +12,0,8.845342588524865,0 +12,1,9.742925277905673,0 +12,2,8.919746725926302,0 +12,3,9.968537370007752,0 +12,4,9.40675749027512,0 +12,5,9.570128993533743,0 +12,6,10.323373592707346,0 +12,7,9.460129343150308,0 +12,8,9.270453533909917,0 +12,9,9.616732477858218,0 +13,0,10.691519398030977,0 +13,1,9.78247288661915,0 +13,2,10.984087874034968,0 +13,3,10.495966763900537,0 +13,4,11.43925875307619,0 +13,5,9.768222270259173,0 +13,6,10.672069535157323,0 +13,7,10.322118347092356,0 +13,8,10.852237689527433,0 +13,9,11.48918537326388,0 +14,0,9.954090978322009,0 +14,1,10.017015506572879,0 +14,2,10.164111855356856,0 +14,3,10.445398411356859,0 +14,4,9.939436270075333,0 +14,5,10.897129111496412,0 +14,6,10.975037276239627,0 +14,7,10.936223259874936,0 +14,8,11.121971012422518,0 +14,9,11.091913291451217,0 +15,0,14.221094434007364,0 +15,1,13.260010697115575,0 +15,2,13.250016173474542,0 +15,3,13.126793992463142,0 +15,4,13.087426447352913,0 +15,5,13.081321888841293,0 +15,6,13.359159126026372,0 +15,7,13.868217591815794,0 +15,8,14.170705346890689,0 +15,9,14.129128811917912,0 +16,0,9.138521106339095,0 +16,1,10.071381027063932,0 +16,2,10.090995081204424,0 +16,3,9.741464591342943,0 +16,4,9.594830672721596,0 +16,5,10.29550270546885,0 +16,6,10.215275538797004,0 +16,7,9.497225115346426,0 +16,8,10.287298617061749,0 +16,9,10.552306651091142,0 +17,0,7.503157621512393,0 +17,1,8.147926711174991,0 +17,2,7.425593772127375,0 +17,3,8.921098064762356,0 +17,4,8.976980007285986,0 +17,5,8.326746347604788,0 +17,6,8.734732183675806,0 +17,7,7.448044031856332,0 +17,8,8.174831184623605,0 +17,9,8.706115554680206,0 +18,0,9.822484759199204,0 +18,1,10.815749511227962,0 +18,2,11.55719953528663,0 +18,3,10.070243910197982,0 +18,4,10.339819567870038,0 +18,5,10.976275454067599,0 +18,6,11.764109344162444,0 +18,7,10.447364113642681,0 +18,8,11.283069331066105,0 +18,9,12.169200695319187,0 +19,0,9.614113793853898,0 +19,1,9.899458764520615,0 +19,2,10.428190064713112,0 +19,3,10.479699161515827,0 +19,4,11.246294011820353,0 +19,5,11.06082322785395,0 +19,6,10.152512338122467,0 +19,7,11.397698569393937,0 +19,8,10.951223924265706,0 +19,9,11.977216976414152,0 +20,0,12.404581382262414,0 +20,1,12.500067508929757,0 +20,2,13.188940955883139,0 +20,3,13.399838391649343,0 +20,4,13.342424832448977,0 +20,5,12.37557649178847,0 +20,6,13.587392370444999,0 +20,7,12.901221124643675,0 +20,8,13.636013206275006,0 +20,9,12.906507932781345,0 +21,0,11.89338356560069,0 +21,1,11.366923039675472,0 +21,2,11.228905166698896,0 +21,3,12.327132560070359,0 +21,4,12.191044752667533,0 +21,5,11.863234091132709,0 +21,6,12.99581191633969,0 +21,7,12.149896281044287,0 +21,8,12.486276328814625,0 +21,9,12.704679215668458,0 +22,0,10.403909151234712,0 +22,1,11.049310264998983,0 +22,2,10.647015944716655,0 +22,3,10.807922957190915,0 +22,4,11.795063438200689,0 +22,5,10.693449092042801,0 +22,6,10.107351952834273,0 +22,7,12.219210615804965,0 +22,8,12.788406094602522,0 +22,9,11.411107485574354,0 +23,0,11.958186762129031,0 +23,1,12.871363794341152,0 +23,2,12.983494307518788,0 +23,3,13.131643861927996,0 +23,4,12.7699117614782,0 +23,5,13.716386353677363,0 +23,6,13.788859470082166,0 +23,7,12.87940297252947,0 +23,8,14.076204148286568,0 +23,9,14.85294827341207,0 +24,0,7.708454057572828,0 +24,1,7.553811310320045,0 +24,2,7.7514722841505375,0 +24,3,8.230102274838579,0 +24,4,7.157138088600689,0 +24,5,8.20466924284295,0 +24,6,8.027241916622893,0 +24,7,9.246386397832321,0 +24,8,8.33538752445182,0 +24,9,9.356417696914058,0 +25,0,8.1686265856095,0 +25,1,9.1141265179019,0 +25,2,9.080197066396119,0 +25,3,8.585973310568114,0 +25,4,9.209194991818975,0 +25,5,9.826756352916304,0 +25,6,9.158601082357203,0 +25,7,8.574515609915622,0 +25,8,9.511715521261783,0 +25,9,9.169285387104248,0 +26,0,7.975677647857009,0 +26,1,8.20605423234639,0 +26,2,7.494022009181191,0 +26,3,7.639018947528612,0 +26,4,8.787881534951714,0 +26,5,8.38548876910411,0 +26,6,7.464475946278666,0 +26,7,9.239270135493017,0 +26,8,9.083032994826072,0 +26,9,8.689910705170709,0 +27,0,8.561965288159342,0 +27,1,9.738284341845631,0 +27,2,9.595055704968678,0 +27,3,10.7116815350305,0 +27,4,9.830474693050933,0 +27,5,9.914231964319471,0 +27,6,9.736916428634181,0 +27,7,10.32876832728886,0 +27,8,10.332922994312542,0 +27,9,10.746242897971062,0 +28,0,6.98596624556874,0 +28,1,7.128923968415401,0 +28,2,7.207076119431393,0 +28,3,7.9420285654768765,0 +28,4,8.395814925035749,0 +28,5,7.517207458312732,0 +28,6,7.634240840990836,0 +28,7,8.103666316822283,0 +28,8,7.923746954880932,0 +28,9,8.622654531457329,0 +29,0,10.144413248319285,0 +29,1,10.95694937017036,0 +29,2,11.079093726648084,0 +29,3,10.410123887435736,0 +29,4,11.188482671444406,0 +29,5,11.312723835705846,0 +29,6,11.769749566369393,0 +29,7,12.526784839458939,0 +29,8,11.947771568033458,0 +29,9,11.654895033493231,0 +30,0,9.527077335195688,0 +30,1,10.180141416523126,0 +30,2,9.267575197745,0 +30,3,9.400267163841729,0 +30,4,10.234829085439564,0 +30,5,9.944786547937053,0 +30,6,10.47929695115719,0 +30,7,10.248731209904804,0 +30,8,10.706386429847237,0 +30,9,9.938015548782133,0 +31,0,7.0523450856005505,0 +31,1,7.053042403813105,0 +31,2,6.650441948942619,0 +31,3,6.5766484370603395,0 +31,4,7.801261928632016,0 +31,5,7.382911219885695,0 +31,6,7.14724727017102,0 +31,7,7.7102979425330185,0 +31,8,8.422396839669336,0 +31,9,6.818018488308724,0 +32,0,7.220522492633253,0 +32,1,7.6073986166372825,0 +32,2,8.89943127094508,0 +32,3,8.410135329044566,0 +32,4,8.75250045781553,0 +32,5,7.898761378021801,0 +32,6,8.009073899949653,0 +32,7,8.89274869594627,0 +32,8,8.79797900305264,0 +32,9,9.14321124701698,0 +33,0,10.533192199436165,0 +33,1,10.866099558127704,0 +33,2,10.906087237352946,0 +33,3,11.315911389442855,0 +33,4,11.430531835872626,0 +33,5,10.317091697704507,0 +33,6,10.103393417229963,0 +33,7,11.827900393726145,0 +33,8,12.02089024153136,0 +33,9,11.016716185143723,0 +34,0,10.746335432515949,0 +34,1,11.825770546292363,0 +34,2,12.341672248964027,0 +34,3,12.875050397005921,0 +34,4,12.334401921839488,0 +34,5,11.990394787495916,0 +34,6,11.82968782499633,0 +34,7,12.381978781624088,0 +34,8,12.326620783846135,0 +34,9,12.068719182924355,0 +35,0,15.017839608815216,0 +35,1,14.986045984419638,0 +35,2,14.761486125161287,0 +35,3,13.83303658447901,0 +35,4,14.820971445532212,0 +35,5,14.813274988132523,0 +35,6,14.8147345954856,0 +35,7,15.318294416905461,0 +35,8,15.111147354814213,0 +35,9,15.263468901535742,0 +36,0,16.146178073700764,0 +36,1,16.09812063601571,0 +36,2,15.135919367157568,0 +36,3,16.169535470719936,0 +36,4,15.949628952590096,0 +36,5,15.687804453194651,0 +36,6,17.26863324166507,0 +36,7,17.39222260245751,0 +36,8,17.307335232162465,0 +36,9,16.855331640442596,0 +37,0,11.504131427028321,0 +37,1,10.934845456852905,0 +37,2,11.130217505000882,0 +37,3,10.582083181321002,0 +37,4,11.427314518801413,0 +37,5,11.359011830764201,0 +37,6,10.77749280813476,0 +37,7,11.503220310643695,0 +37,8,11.58895408462359,0 +37,9,12.627599449874314,0 +38,0,8.468030424192976,0 +38,1,8.126646479482005,0 +38,2,8.345317414836813,0 +38,3,8.343029948988372,0 +38,4,8.319466767538499,0 +38,5,7.979710161242129,0 +38,6,8.545397821132807,0 +38,7,8.347874631610015,0 +38,8,8.1957659904747,0 +38,9,9.17653466721004,0 +39,0,5.931539768503999,0 +39,1,4.942553682853342,0 +39,2,5.8745651197898185,0 +39,3,6.533758131487343,0 +39,4,6.665519307985543,0 +39,5,6.748825859980001,0 +39,6,6.3553640419881114,0 +39,7,6.013384774552806,0 +39,8,5.994058024858351,0 +39,9,6.80820989091256,0 +40,0,10.725062941870252,0 +40,1,11.279088480490204,0 +40,2,11.285412749858711,0 +40,3,10.769308596422086,0 +40,4,10.313319985257237,0 +40,5,10.875869626182222,0 +40,6,11.244038522092932,0 +40,7,11.134381301992969,0 +40,8,11.046475870648845,0 +40,9,11.561865332172621,0 +41,0,8.122138938674043,0 +41,1,8.16008476751666,0 +41,2,8.729844226531066,0 +41,3,8.473121519314747,0 +41,4,8.896170003868525,0 +41,5,9.01071982080541,0 +41,6,8.40440330883356,0 +41,7,8.83349724744664,0 +41,8,9.89300627933758,0 +41,9,8.693078053012204,0 +42,0,8.110958255155364,0 +42,1,8.338362903019373,0 +42,2,9.383840452928963,0 +42,3,9.48474411865698,0 +42,4,9.51048005192807,0 +42,5,10.27638039410488,0 +42,6,8.43286826308916,0 +42,7,10.067252416856041,0 +42,8,10.750004580542416,0 +42,9,9.505395097194773,0 +43,0,8.585903145985437,0 +43,1,8.499360443266074,0 +43,2,8.528633378841619,0 +43,3,8.912675399368865,0 +43,4,9.889549043722637,0 +43,5,10.194499739624144,0 +43,6,9.20783678936031,0 +43,7,10.428336508912619,0 +43,8,9.593594640372697,0 +43,9,10.552651915230925,0 +44,0,9.671768902340483,0 +44,1,9.883947140190301,0 +44,2,10.101155463488645,0 +44,3,11.607845066850997,0 +44,4,10.544061014589623,0 +44,5,9.864785962191498,0 +44,6,10.802917162305157,0 +44,7,10.237501958625057,0 +44,8,10.273543135716626,0 +44,9,11.072718907113284,0 +45,0,12.147503255460864,0 +45,1,12.37124841883119,0 +45,2,12.338953072507934,0 +45,3,12.600252024878007,0 +45,4,12.744443410293362,0 +45,5,11.66347699522258,0 +45,6,13.065246795880054,0 +45,7,12.340950751660136,0 +45,8,12.210810893795582,0 +45,9,13.002754839645947,0 +46,0,10.356087636547917,0 +46,1,10.067344052604549,0 +46,2,10.929613063724675,0 +46,3,9.943009551705236,0 +46,4,10.51062526050441,0 +46,5,10.52166006742419,0 +46,6,10.890803499721018,0 +46,7,11.153529277214307,0 +46,8,10.610150660078917,0 +46,9,11.576238473618803,0 +47,0,9.714231510089974,0 +47,1,8.836757199266668,0 +47,2,8.903434605724108,0 +47,3,9.976570679457968,0 +47,4,9.972246667029447,0 +47,5,10.131020269135307,0 +47,6,10.268739097657514,0 +47,7,10.495510245677712,0 +47,8,10.956555130846773,0 +47,9,10.027177875463845,0 +48,0,7.34270669789798,0 +48,1,7.482030565404686,0 +48,2,8.27315082793273,0 +48,3,8.851166183671602,0 +48,4,8.113022775228622,0 +48,5,7.17784257226855,0 +48,6,7.676696063043189,0 +48,7,8.21217413295037,0 +48,8,8.44982805060077,0 +48,9,8.624498180721014,0 +49,0,6.669924672081744,0 +49,1,6.594760945180061,0 +49,2,7.37530391586299,0 +49,3,6.612625457750601,0 +49,4,6.619475130853514,0 +49,5,7.390071879851517,0 +49,6,6.482813075628226,0 +49,7,7.545479963251287,0 +49,8,7.501910702454419,0 +49,9,7.476912865632202,0 +50,0,9.82152223542797,0 +50,1,8.81628045920598,0 +50,2,10.257533267115377,0 +50,3,9.21466845723929,0 +50,4,8.788875959368225,0 +50,5,9.562343972350751,0 +50,6,9.089261581693954,0 +50,7,9.351506257946589,0 +50,8,10.025900831861085,0 +50,9,10.205175226178401,0 +51,0,9.581351048236229,0 +51,1,10.48613747088656,0 +51,2,10.671188777373924,0 +51,3,10.910585776645005,0 +51,4,10.55714160192622,0 +51,5,11.074149254406208,0 +51,6,9.55203568437633,0 +51,7,10.433481624049126,0 +51,8,10.258932270201646,0 +51,9,10.852047828927056,0 +52,0,13.250135217561397,0 +52,1,13.552780906184898,0 +52,2,14.67694726726179,0 +52,3,13.750999928949577,0 +52,4,14.142755985754873,0 +52,5,13.919725162197647,0 +52,6,14.173716471138182,0 +52,7,14.2388681673778,0 +52,8,14.560021860146346,0 +52,9,15.018513594933355,0 +53,0,11.084246041747438,0 +53,1,10.515359083900355,0 +53,2,10.755322485963573,0 +53,3,9.98511678835299,0 +53,4,10.616610671737963,0 +53,5,11.230693775387566,0 +53,6,11.29353336215435,0 +53,7,11.066353909343297,0 +53,8,11.50374602272975,0 +53,9,11.405932397442681,0 +54,0,12.565632151175775,0 +54,1,12.21015860052467,0 +54,2,11.987629864260596,0 +54,3,12.433399650512627,0 +54,4,10.900181834119728,0 +54,5,12.656921890238639,0 +54,6,10.741272609438722,0 +54,7,11.803747318343405,0 +54,8,12.991363322516607,0 +54,9,13.104243690679512,0 +55,0,8.42019441285173,0 +55,1,8.745357781982225,0 +55,2,9.88667929722876,0 +55,3,9.059393737135357,0 +55,4,10.522868959123187,0 +55,5,9.500448876333522,0 +55,6,9.805426891362329,0 +55,7,10.509846002170999,0 +55,8,9.866922361018405,0 +55,9,9.400236812576296,0 +56,0,7.575248741318806,0 +56,1,7.712307099070905,0 +56,2,7.147741641065168,0 +56,3,7.802196431495539,0 +56,4,7.659016420167233,0 +56,5,8.592291345759667,0 +56,6,8.247418404258578,0 +56,7,8.188714592592165,0 +56,8,8.377021501854664,0 +56,9,8.641673476369034,0 +57,0,8.378173575380147,0 +57,1,7.669910361200566,0 +57,2,7.74897265922462,0 +57,3,8.922106083236523,0 +57,4,8.263598039432893,0 +57,5,7.86134542923682,0 +57,6,8.89167283022681,0 +57,7,9.0014513785516,0 +57,8,8.104408846906905,0 +57,9,9.08450733326706,0 +58,0,8.917339417142681,0 +58,1,8.836741096781626,0 +58,2,9.065538612809192,0 +58,3,8.147413321883121,0 +58,4,9.115067948426592,0 +58,5,8.898237991193271,0 +58,6,8.908152791566172,0 +58,7,9.684825675638356,0 +58,8,10.08918512626561,0 +58,9,10.346732898228675,0 +59,0,14.914343400623281,0 +59,1,14.302072374055236,0 +59,2,14.6332995463835,0 +59,3,14.94035090076245,0 +59,4,14.71752843952006,0 +59,5,14.8223215588261,0 +59,6,15.268816030808418,0 +59,7,14.927298141857362,0 +59,8,14.692295996683498,0 +59,9,14.9497029319524,0 +60,0,8.674181830418098,0 +60,1,8.458723287151205,0 +60,2,8.727331629985395,0 +60,3,8.992266262652322,0 +60,4,8.569805910102422,0 +60,5,9.23535072336613,0 +60,6,9.146647943280424,0 +60,7,8.439820098095487,0 +60,8,9.87837903714435,0 +60,9,9.00685426967277,0 +61,0,10.849430735880098,0 +61,1,11.254456552679539,0 +61,2,11.366455827133528,0 +61,3,12.003065066801458,0 +61,4,11.940053154699573,0 +61,5,12.008562423652394,0 +61,6,12.586829390213929,0 +61,7,12.546916088069041,0 +61,8,12.635002342794834,0 +61,9,12.781892636137037,0 +62,0,8.502212977827067,0 +62,1,7.2401689729372265,0 +62,2,8.211926517693717,0 +62,3,7.404040611581212,0 +62,4,8.612175607224845,0 +62,5,8.691829157713775,0 +62,6,9.316911781220787,0 +62,7,9.487962840186508,0 +62,8,9.095533167952132,0 +62,9,8.843965105127499,0 +63,0,12.105726582021358,0 +63,1,11.69918720708633,0 +63,2,12.062449700026812,0 +63,3,12.656214184228872,0 +63,4,11.984260352753777,0 +63,5,12.765985345369739,0 +63,6,12.801846710094638,0 +63,7,12.085750117183077,0 +63,8,13.15009308985719,0 +63,9,13.11242919843424,0 +64,0,10.820864875898055,0 +64,1,10.4887402031979,0 +64,2,10.54029892292394,0 +64,3,10.801070643760214,0 +64,4,11.441199163857705,0 +64,5,10.792089348511027,0 +64,6,11.58865805652166,0 +64,7,10.849024167772528,0 +64,8,11.467867639992301,0 +64,9,11.72472580949873,0 +65,0,10.90928927721328,0 +65,1,9.098066376923637,0 +65,2,10.622731487928887,0 +65,3,10.06164541167746,0 +65,4,10.292310163691802,0 +65,5,10.245898161377923,0 +65,6,10.509087613781436,0 +65,7,10.30988165250281,0 +65,8,11.21374929541255,0 +65,9,10.35848863257146,0 +66,0,10.484592233818756,0 +66,1,9.69629138762262,0 +66,2,10.088349024528489,0 +66,3,9.682493671584023,0 +66,4,10.545986807902892,0 +66,5,11.1410379261924,0 +66,6,10.479796857818705,0 +66,7,10.520032098377177,0 +66,8,10.16116646619517,0 +66,9,10.70382862095298,0 +67,0,7.894027479280456,0 +67,1,8.333985615781806,0 +67,2,9.003817632195787,0 +67,3,9.649931208011918,0 +67,4,10.49503003975597,0 +67,5,8.897132066605769,0 +67,6,10.008074687463939,0 +67,7,9.512300849342042,0 +67,8,9.414800785768032,0 +67,9,9.806721835004296,0 +68,0,10.92310264480414,0 +68,1,11.04734226577027,0 +68,2,10.887977821550827,0 +68,3,10.493089573064736,0 +68,4,10.52033180170312,0 +68,5,11.718766844604776,0 +68,6,11.353794357921304,0 +68,7,11.294099953136078,0 +68,8,11.696397216325739,0 +68,9,12.189610529421977,0 +69,0,9.18021501083164,0 +69,1,8.862005570997956,0 +69,2,9.903179517142915,0 +69,3,10.179625790809256,0 +69,4,9.737311366979867,0 +69,5,10.076864794990485,0 +69,6,10.311013037725345,0 +69,7,10.355082697520697,0 +69,8,10.197082627874753,0 +69,9,10.289206562855881,0 +70,0,7.808618759138755,0 +70,1,7.100119635191981,0 +70,2,8.099150050895297,0 +70,3,7.170881486902226,0 +70,4,7.551488744902976,0 +70,5,8.700571006369547,0 +70,6,8.568898553812602,0 +70,7,8.992487617308434,0 +70,8,8.212951280621859,0 +70,9,7.87266405182712,0 +71,0,7.323906252825591,0 +71,1,7.595140499981048,0 +71,2,7.683619209530767,0 +71,3,8.310965090739112,0 +71,4,7.66348963885904,0 +71,5,8.120142087262224,0 +71,6,7.549811276643458,0 +71,7,8.369283490382667,0 +71,8,8.245679126453581,0 +71,9,7.969218182218503,0 +72,0,10.227261154356812,0 +72,1,10.353087962186628,0 +72,2,10.410098270775691,0 +72,3,11.530846456578082,0 +72,4,10.6959528342903,0 +72,5,10.72322778035557,0 +72,6,9.896450356639168,0 +72,7,10.5980721594545,0 +72,8,11.013639123216064,0 +72,9,10.902249497355918,0 +73,0,13.849036309509769,0 +73,1,13.17571362512276,0 +73,2,14.002404637479996,0 +73,3,13.489034829846055,0 +73,4,13.576796431031658,0 +73,5,13.61378428896462,0 +73,6,13.7600806386316,0 +73,7,14.717580014345799,0 +73,8,12.798377149445262,0 +73,9,13.05740964060153,0 +74,0,10.04839781161882,0 +74,1,10.42724716811566,0 +74,2,10.865035439528882,0 +74,3,10.856526826160614,0 +74,4,10.527860270706592,0 +74,5,11.329505073767757,0 +74,6,11.435080928723242,0 +74,7,11.112012142782659,0 +74,8,11.601312013390297,0 +74,9,11.35630585886294,0 +75,0,9.481981834388792,0 +75,1,10.211631767280236,0 +75,2,10.018029909069572,0 +75,3,10.063393271157478,0 +75,4,10.899200133875022,0 +75,5,9.03728728351019,0 +75,6,9.653881262700448,0 +75,7,9.869188134593049,0 +75,8,10.381093023995833,0 +75,9,10.53541926289722,0 +76,0,9.817989455318562,0 +76,1,10.179073263049672,0 +76,2,10.3412314399433,0 +76,3,12.100364325243033,0 +76,4,11.87252325307866,0 +76,5,10.865777570362761,0 +76,6,10.989860634051686,0 +76,7,10.697050280610938,0 +76,8,10.417587072592454,0 +76,9,11.413212186000825,0 +77,0,12.113077491791637,0 +77,1,12.669578217582645,0 +77,2,12.011900558891911,0 +77,3,12.531016254258521,0 +77,4,13.086317031065066,0 +77,5,13.295108282305618,0 +77,6,13.420739462784656,0 +77,7,12.65175899133513,0 +77,8,13.839346348481431,0 +77,9,13.111897635112726,0 +78,0,10.755193960031367,0 +78,1,10.533445664418617,0 +78,2,9.95057125318874,0 +78,3,10.58066615043712,0 +78,4,11.021446873759597,0 +78,5,11.245247692965012,0 +78,6,10.968271054005504,0 +78,7,11.90465444802824,0 +78,8,11.742520388220994,0 +78,9,11.210437944527838,0 +79,0,9.553206444400999,0 +79,1,10.245042280926619,0 +79,2,10.358387838724038,0 +79,3,8.864145597921278,0 +79,4,9.11487898275395,0 +79,5,10.420604931699451,0 +79,6,9.249005012913706,0 +79,7,9.216874971686686,0 +79,8,9.735048472983003,0 +79,9,10.28825889306982,0 +80,0,12.16137906681423,0 +80,1,11.987295609916982,0 +80,2,12.075471189257524,0 +80,3,12.156408887913425,0 +80,4,12.172822605790307,0 +80,5,13.853393858757808,0 +80,6,12.961332955712555,0 +80,7,13.355956947322147,0 +80,8,12.768038686060317,0 +80,9,13.019599081526755,0 +81,0,10.500735886238049,0 +81,1,9.631658709463625,0 +81,2,10.368508238499293,0 +81,3,10.252203536933031,0 +81,4,10.132620987947424,0 +81,5,10.759833389774176,0 +81,6,12.119990716614042,0 +81,7,11.53529479307105,0 +81,8,12.302794758083815,0 +81,9,11.963004165373665,0 +82,0,13.462791396849564,0 +82,1,12.721063486339697,0 +82,2,13.533487461088319,0 +82,3,13.735869101029158,0 +82,4,13.183188337875137,0 +82,5,13.851282169300067,0 +82,6,13.9546058632794,0 +82,7,13.496735673426942,0 +82,8,13.310127767856487,0 +82,9,13.379447818042848,0 +83,0,10.409721645404543,0 +83,1,10.62997380883847,0 +83,2,10.170205959214854,0 +83,3,10.679761545106862,0 +83,4,11.051737392513056,0 +83,5,11.175564231776463,0 +83,6,11.742408934505058,0 +83,7,11.704959290542833,0 +83,8,10.66582315806554,0 +83,9,12.108261979212823,0 +84,0,7.285573684281142,0 +84,1,8.173498342786214,0 +84,2,7.785055249584545,0 +84,3,7.644987247330516,0 +84,4,7.047213506671534,0 +84,5,7.965279681450826,0 +84,6,7.371370318906195,0 +84,7,8.734711554480386,0 +84,8,9.109458265128625,0 +84,9,8.052606441910655,0 +85,0,7.414666972768306,0 +85,1,7.000985840504683,0 +85,2,7.1494392364402755,0 +85,3,7.950928953441356,0 +85,4,7.644412470586965,0 +85,5,8.633505622474177,0 +85,6,7.62256536215721,0 +85,7,8.463349767745916,0 +85,8,8.112388307987562,0 +85,9,8.561570606380787,0 +86,0,13.078277035797527,0 +86,1,13.377076445686708,0 +86,2,13.44410642481303,0 +86,3,13.183827307039607,0 +86,4,14.032896631753376,0 +86,5,14.126980367251681,0 +86,6,14.198633211915283,0 +86,7,14.77033869142132,0 +86,8,14.826528265080652,0 +86,9,14.014383519537931,0 +87,0,13.633857600783683,0 +87,1,13.235531096937487,0 +87,2,13.659009099623871,0 +87,3,13.447456717007162,0 +87,4,14.657366596134619,0 +87,5,14.129180516738074,0 +87,6,13.938316877514382,0 +87,7,14.74414072749478,0 +87,8,13.162023736668889,0 +87,9,13.34028014413395,0 +88,0,10.038880901044687,0 +88,1,9.771195797493483,0 +88,2,9.96618472356518,0 +88,3,9.277763521775526,0 +88,4,10.029344718009616,0 +88,5,11.14271627863547,0 +88,6,10.65306138333273,0 +88,7,10.425814846006205,0 +88,8,10.22143461838233,0 +88,9,10.343675147500102,0 +89,0,8.169026413128016,0 +89,1,9.462445568188604,0 +89,2,9.856366911968951,0 +89,3,8.745815694142719,0 +89,4,9.302916911574616,0 +89,5,9.154893041611313,0 +89,6,9.350949760323642,0 +89,7,9.960366171138974,0 +89,8,8.991424280233788,0 +89,9,10.440760576906557,0 +90,0,13.299816119451245,3 +90,1,12.897337290465948,3 +90,2,11.882534329492133,3 +90,3,14.724679271483211,3 +90,4,15.939339551535753,3 +90,5,14.033891444120119,3 +90,6,15.349126878387644,3 +90,7,15.032888195250774,3 +90,8,16.125173735276825,3 +90,9,15.485331608412674,3 +91,0,7.98788564628083,7 +91,1,8.16863884549757,7 +91,2,8.904021555778035,7 +91,3,7.98443766407534,7 +91,4,8.37393082072433,7 +91,5,7.543380896349307,7 +91,6,8.981115417559897,7 +91,7,10.105654211877342,7 +91,8,10.305531553869152,7 +91,9,10.67478473848842,7 +92,0,7.429103789418422,5 +92,1,8.424930976552815,5 +92,2,8.894279098550852,5 +92,3,8.657012873836994,5 +92,4,7.816097179998985,5 +92,5,10.648641314923516,5 +92,6,11.181560413578092,5 +92,7,9.971551579105391,5 +92,8,10.47316295517949,5 +92,9,11.548817764129106,5 +93,0,11.421701306476733,5 +93,1,11.592639380441465,5 +93,2,12.416400993189743,5 +93,3,11.820929978631192,5 +93,4,12.015597594594679,5 +93,5,15.142026352987251,5 +93,6,13.779751310332843,5 +93,7,13.58771220756884,5 +93,8,14.300551788501656,5 +93,9,14.406149104072536,5 +94,0,8.947432058307717,5 +94,1,8.794591244503398,5 +94,2,10.565711965325859,5 +94,3,9.674211489474834,5 +94,4,9.244647063579784,5 +94,5,12.16875490542062,5 +94,6,11.756217178500153,5 +94,7,11.784349031121904,5 +94,8,12.291926190238069,5 +94,9,11.464848620164977,5 +95,0,10.166069868518443,7 +95,1,11.046233670425732,7 +95,2,10.422845097658511,7 +95,3,10.101650214668124,7 +95,4,10.136203626345736,7 +95,5,10.284586279887302,7 +95,6,10.462774927535966,7 +95,7,12.50085300018072,7 +95,8,13.24791198709714,7 +95,9,12.023151714955365,7 +96,0,10.104114228157043,3 +96,1,9.583270940379695,3 +96,2,10.06158809579866,3 +96,3,11.409620247850544,3 +96,4,12.737429736375693,3 +96,5,12.795931683036912,3 +96,6,12.752909465826011,3 +96,7,11.615736815054206,3 +96,8,12.887674738484002,3 +96,9,12.77554174676883,3 +97,0,9.872513474031583,7 +97,1,10.763000405712459,7 +97,2,11.057416342114497,7 +97,3,11.253186734509443,7 +97,4,11.163779781072295,7 +97,5,11.320764806021558,7 +97,6,12.011954793724414,7 +97,7,13.988166052553206,7 +97,8,12.041621620706495,7 +97,9,13.416429820179026,7 +98,0,12.732638208855459,3 +98,1,12.038914436263948,3 +98,2,13.062055251743589,3 +98,3,15.739342608832688,3 +98,4,14.671679792832139,3 +98,5,15.48316419197237,3 +98,6,14.957051983979214,3 +98,7,15.179007710787594,3 +98,8,16.353660539712454,3 +98,9,15.443684812571522,3 +99,0,10.412276058308803,3 +99,1,9.345560755772997,3 +99,2,11.312328594500931,3 +99,3,12.782496036956742,3 +99,4,12.48991989247836,3 +99,5,12.968943068803439,3 +99,6,12.076260099329586,3 +99,7,13.472435184679819,3 +99,8,12.819177426055276,3 +99,9,12.979135056279274,3 +100,0,11.042049779070974,5 +100,1,11.097551885180923,5 +100,2,11.835212892367919,5 +100,3,11.715831011605827,5 +100,4,12.013751657255236,5 +100,5,13.78348024181093,5 +100,6,14.144477492092307,5 +100,7,13.778453214591195,5 +100,8,15.202743507258491,5 +100,9,13.403912378338466,5 +101,0,6.530124459719447,7 +101,1,6.68360952329844,7 +101,2,5.673576394955158,7 +101,3,7.314292603215941,7 +101,4,6.797400939410615,7 +101,5,6.8852253013494105,7 +101,6,6.513666891450893,7 +101,7,8.491042718828618,7 +101,8,8.59495138441815,7 +101,9,8.994921096097714,7 +102,0,10.603156333656713,7 +102,1,10.093220305070474,7 +102,2,9.646841384571118,7 +102,3,10.614776727332256,7 +102,4,10.763138569337162,7 +102,5,10.729892960363658,7 +102,6,10.875312649402256,7 +102,7,12.46709913106086,7 +102,8,12.238068612574475,7 +102,9,12.368466480819375,7 +103,0,7.495807835185329,7 +103,1,8.532013765187232,7 +103,2,8.47181506780096,7 +103,3,8.393545607149404,7 +103,4,7.235730047241724,7 +103,5,8.189880724567173,7 +103,6,9.473959785115841,7 +103,7,10.681226459382692,7 +103,8,11.29383529824268,7 +103,9,10.51819674442663,7 +104,0,8.28172662033236,7 +104,1,7.5903600491254055,7 +104,2,7.907394077394512,7 +104,3,9.025263775093586,7 +104,4,8.720593929447446,7 +104,5,7.909091068509215,7 +104,6,7.865763786717564,7 +104,7,10.099601373392979,7 +104,8,10.617323003391013,7 +104,9,10.409761812027309,7 +105,0,8.046073193640764,7 +105,1,9.07728589149702,7 +105,2,8.38659032025358,7 +105,3,9.69348981266116,7 +105,4,8.689066548018586,7 +105,5,9.636354463254863,7 +105,6,9.022354977497812,7 +105,7,11.425175552767222,7 +105,8,10.681389740383638,7 +105,9,10.716815654848794,7 +106,0,9.56160609610587,5 +106,1,9.926029739831034,5 +106,2,9.551881369598608,5 +106,3,8.988556451056528,5 +106,4,9.051623935453714,5 +106,5,11.118184917505255,5 +106,6,11.047942062128685,5 +106,7,12.08649579832192,5 +106,8,12.694275079452389,5 +106,9,12.225045225135972,5 +107,0,10.701597706043101,3 +107,1,11.836424854544752,3 +107,2,11.950405586395723,3 +107,3,15.179804459824748,3 +107,4,13.503236128252249,3 +107,5,14.136466283412428,3 +107,6,15.284731424984308,3 +107,7,14.7567942553853,3 +107,8,14.91346162147024,3 +107,9,14.789097303223967,3 +108,0,7.512186333849504,7 +108,1,7.8360193296601395,7 +108,2,7.142471405074489,7 +108,3,8.086243853974972,7 +108,4,7.060713056078103,7 +108,5,8.011398550936642,7 +108,6,8.036585046164035,7 +108,7,9.739068020424817,7 +108,8,10.13587192685805,7 +108,9,9.935944488188095,7 +109,0,9.525412321757962,5 +109,1,9.984455614382012,5 +109,2,9.709268667503004,5 +109,3,10.52266171723546,5 +109,4,9.886220238112653,5 +109,5,12.91725149668436,5 +109,6,12.070523451224425,5 +109,7,12.47811552739833,5 +109,8,12.548961281081246,5 +109,9,13.623836349626254,5 +110,0,9.196806106141139,5 +110,1,9.025794232060646,5 +110,2,9.481020853867705,5 +110,3,8.278064959668,5 +110,4,9.41009433851239,5 +110,5,12.530418222235225,5 +110,6,11.69783438653418,5 +110,7,11.765617717917511,5 +110,8,12.105725323153942,5 +110,9,12.379463754990017,5 +111,0,8.34592046483115,5 +111,1,8.603490570380565,5 +111,2,9.649799456819151,5 +111,3,9.287215530327325,5 +111,4,10.400666442386036,5 +111,5,11.687066616333466,5 +111,6,10.97738844341952,5 +111,7,11.410646852854079,5 +111,8,10.76981580146079,5 +111,9,12.127591042634622,5 +112,0,11.911323586977016,3 +112,1,11.893284295718258,3 +112,2,12.744955170124078,3 +112,3,14.904775568867557,3 +112,4,14.320321438931153,3 +112,5,13.43816881687078,3 +112,6,14.914156440855336,3 +112,7,14.983195764863495,3 +112,8,14.96021143568768,3 +112,9,15.396255525263266,3 +113,0,10.493816825777332,7 +113,1,12.180608567100421,7 +113,2,10.796516997436918,7 +113,3,11.75084233760828,7 +113,4,10.632557026801177,7 +113,5,11.111098530372827,7 +113,6,12.43441519737052,7 +113,7,13.645780962507427,7 +113,8,14.296192176574252,7 +113,9,14.094708568367475,7 +114,0,12.622317136885401,7 +114,1,13.456721377351831,7 +114,2,12.756466802559537,7 +114,3,12.084427161103754,7 +114,4,12.673176168909636,7 +114,5,13.450392079236336,7 +114,6,13.472550768066741,7 +114,7,14.591267089767769,7 +114,8,15.112624955181388,7 +114,9,15.283935540284364,7 +115,0,9.508253844443088,5 +115,1,9.103031847199423,5 +115,2,9.57680308744046,5 +115,3,9.626683398870437,5 +115,4,10.47675527138116,5 +115,5,12.59554929699532,5 +115,6,12.21808341245579,5 +115,7,12.194968743564834,5 +115,8,12.560950797519757,5 +115,9,12.182701087772413,5 +116,0,8.034698735574873,5 +116,1,8.238004172195735,5 +116,2,8.490708194672864,5 +116,3,8.326377876762459,5 +116,4,9.089144556190032,5 +116,5,11.472972613550448,5 +116,6,10.414565626259472,5 +116,7,11.32067913226661,5 +116,8,11.298146879025428,5 +116,9,11.678869119426995,5 +117,0,9.545812982187208,7 +117,1,8.925482726273186,7 +117,2,9.121087716810418,7 +117,3,9.866994154031488,7 +117,4,10.466672826111758,7 +117,5,8.9451886807175,7 +117,6,10.32514565613979,7 +117,7,12.847093368882822,7 +117,8,12.275769428135895,7 +117,9,12.727383614252222,7 +118,0,10.133459042093543,5 +118,1,10.525310482904006,5 +118,2,9.874536823388368,5 +118,3,9.612821928333007,5 +118,4,11.356376483155081,5 +118,5,12.492496118680222,5 +118,6,12.857229660040728,5 +118,7,12.491772455907347,5 +118,8,12.580008731000508,5 +118,9,13.298074732495136,5 +119,0,9.988358119839598,5 +119,1,10.64213221590477,5 +119,2,10.779415926397217,5 +119,3,10.813571540069725,5 +119,4,10.872447112371644,5 +119,5,13.642030819373364,5 +119,6,12.419349832384977,5 +119,7,13.025679283805962,5 +119,8,13.719491627167216,5 +119,9,13.88541662157644,5 +120,0,7.12034203086098,5 +120,1,7.775046134705332,5 +120,2,8.710284403466897,5 +120,3,8.3852634081328,5 +120,4,8.328855371273153,5 +120,5,10.222381508815939,5 +120,6,10.239323392679394,5 +120,7,10.675333387078428,5 +120,8,10.505036793041025,5 +120,9,10.709934545011045,5 +121,0,10.220278916313267,3 +121,1,9.941943799008998,3 +121,2,10.647044250196467,3 +121,3,13.938101887463459,3 +121,4,11.692486243592711,3 +121,5,12.842985825350738,3 +121,6,12.400884518946295,3 +121,7,13.169680852015373,3 +121,8,13.00425829991968,3 +121,9,13.46190695027698,3 +122,0,11.0442099485322,3 +122,1,10.6526672191619,3 +122,2,11.2224828044849,3 +122,3,11.584307713383472,3 +122,4,13.651682809235822,3 +122,5,13.14398773622398,3 +122,6,13.5953783381504,3 +122,7,12.54651878785705,3 +122,8,12.841071719475144,3 +122,9,13.429425807350198,3 +123,0,15.02642913039911,5 +123,1,15.276482369072836,5 +123,2,15.777205655532825,5 +123,3,14.452337917416418,5 +123,4,15.921857610926496,5 +123,5,17.539089030046807,5 +123,6,17.533057144505566,5 +123,7,16.999390457875396,5 +123,8,18.086472409256274,5 +123,9,16.727894149552327,5 +124,0,14.077886228182695,7 +124,1,13.107636304826844,7 +124,2,13.980418178872076,7 +124,3,13.994104803899381,7 +124,4,14.738458296210283,7 +124,5,14.164361638056395,7 +124,6,14.006619941990508,7 +124,7,16.20038787457774,7 +124,8,17.190130606940674,7 +124,9,16.921837707153788,7 +125,0,8.100698308120778,3 +125,1,8.275290088900103,3 +125,2,7.709924753589635,3 +125,3,10.78775039930813,3 +125,4,9.813605445034627,3 +125,5,11.169411531800087,3 +125,6,10.957824095127993,3 +125,7,12.18698919866932,3 +125,8,10.543445522020207,3 +125,9,11.142182253362044,3 +126,0,10.324377127363178,7 +126,1,10.120928097047354,7 +126,2,9.277127251111697,7 +126,3,9.837216991088372,7 +126,4,9.826232955109191,7 +126,5,10.066211210598725,7 +126,6,9.675893058909232,7 +126,7,12.080573075551278,7 +126,8,11.42620708926329,7 +126,9,12.392795002835689,7 +127,0,7.29812633303691,7 +127,1,7.059406290004051,7 +127,2,6.4655905510336,7 +127,3,7.867381927751725,7 +127,4,7.655378515322395,7 +127,5,7.2101084474755055,7 +127,6,7.532815772906962,7 +127,7,9.022781294935076,7 +127,8,9.360410932437041,7 +127,9,9.704210323147425,7 +128,0,9.432796378213103,3 +128,1,8.598834276629288,3 +128,2,9.338692801737405,3 +128,3,10.849427870642238,3 +128,4,11.342559752498964,3 +128,5,11.247888221001126,3 +128,6,11.855114272119398,3 +128,7,11.251180302963748,3 +128,8,10.832321847071807,3 +128,9,11.027611456391211,3 +129,0,10.365271260209301,5 +129,1,10.106680021841653,5 +129,2,11.036316015020455,5 +129,3,10.698325924751643,5 +129,4,12.218513282193896,5 +129,5,12.262352709612115,5 +129,6,13.33302659653558,5 +129,7,12.767233495427845,5 +129,8,13.184509336989342,5 +129,9,13.601919743850953,5 +130,0,11.249021554339324,3 +130,1,12.63012030017048,3 +130,2,13.56316097291355,3 +130,3,14.484988092592172,3 +130,4,14.941611170179222,3 +130,5,15.424168088339167,3 +130,6,14.92973049176297,3 +130,7,14.821227147510724,3 +130,8,14.073839039915592,3 +130,9,15.632202601473955,3 +131,0,8.485248119648334,7 +131,1,8.135490398818702,7 +131,2,8.805130679651628,7 +131,3,8.549992146109693,7 +131,4,8.67761925895502,7 +131,5,9.344977902567136,7 +131,6,8.47264111885086,7 +131,7,11.76548996754966,7 +131,8,10.64298924295358,7 +131,9,11.11048162033515,7 +132,0,9.653180216897695,7 +132,1,9.355384500685618,7 +132,2,8.940509116112127,7 +132,3,8.5214420121138,7 +132,4,8.82707096651446,7 +132,5,9.228344777157256,7 +132,6,9.310468736034206,7 +132,7,11.346271655051424,7 +132,8,11.476290862822315,7 +132,9,11.484519565011851,7 +133,0,4.9844325727026835,5 +133,1,5.547166569442274,5 +133,2,6.362733967516114,5 +133,3,5.321254791747132,5 +133,4,6.010644184753051,5 +133,5,7.795544976137268,5 +133,6,8.361097155221746,5 +133,7,8.21110534998928,5 +133,8,9.244118773010026,5 +133,9,8.858713517617817,5 +134,0,9.01876657602158,3 +134,1,10.110282164717034,3 +134,2,9.487342230779152,3 +134,3,11.68616997322142,3 +134,4,12.231577668963746,3 +134,5,11.928794449045117,3 +134,6,12.721096067071663,3 +134,7,11.987413128153486,3 +134,8,12.165194882614504,3 +134,9,12.246482712136132,3 +135,0,7.592351802103355,7 +135,1,8.52476695405478,7 +135,2,8.389569323086025,7 +135,3,9.030143153286911,7 +135,4,9.408993278230502,7 +135,5,7.859812660314704,7 +135,6,8.510380739944518,7 +135,7,9.987071017959156,7 +135,8,10.28636821240674,7 +135,9,10.46012992115304,7 +136,0,8.412543708198715,5 +136,1,8.515177508818645,5 +136,2,9.068978956007724,5 +136,3,8.63276193607465,5 +136,4,9.762184624836669,5 +136,5,10.982244376559095,5 +136,6,12.945905630316048,5 +136,7,11.415428919785654,5 +136,8,11.77712996622638,5 +136,9,11.193450553034834,5 +137,0,9.099324322728643,7 +137,1,8.906028784066281,7 +137,2,8.594736379032879,7 +137,3,7.872877169275042,7 +137,4,8.751611882527135,7 +137,5,8.185562920656198,7 +137,6,9.577456075401503,7 +137,7,11.35087580959141,7 +137,8,11.093997962908693,7 +137,9,9.928090612041164,7 +138,0,9.31976690216537,7 +138,1,10.255355457294822,7 +138,2,10.617916737356463,7 +138,3,10.666059797240951,7 +138,4,9.415468390374159,7 +138,5,10.462251044901338,7 +138,6,10.91923249973635,7 +138,7,11.682307108703762,7 +138,8,12.607522633808268,7 +138,9,12.848396090649114,7 +139,0,6.33416092486337,7 +139,1,5.6584622895861365,7 +139,2,6.521694162249873,7 +139,3,6.6208905021592255,7 +139,4,6.810507015012861,7 +139,5,6.702807504574203,7 +139,6,6.8630371406660045,7 +139,7,9.16469007611325,7 +139,8,9.505770519771593,7 +139,9,9.232329835858403,7 +140,0,6.8610850450340175,7 +140,1,7.212086528345247,7 +140,2,6.305451546270428,7 +140,3,7.604427072831247,7 +140,4,7.46562796065654,7 +140,5,7.585756272516799,7 +140,6,8.14627666301525,7 +140,7,8.789501744193895,7 +140,8,8.878992574288535,7 +140,9,10.102811431879779,7 +141,0,14.254935796492338,3 +141,1,14.443582695652562,3 +141,2,14.227430686470372,3 +141,3,16.890671061176562,3 +141,4,17.124813645552155,3 +141,5,17.00048537103236,3 +141,6,16.728079994854305,3 +141,7,15.904649530249392,3 +141,8,17.44311232807516,3 +141,9,17.769358557165393,3 +142,0,7.747434043443928,5 +142,1,7.085333623768184,5 +142,2,7.962052034285817,5 +142,3,8.15541969143266,5 +142,4,8.604562961351482,5 +142,5,9.48709987533796,5 +142,6,9.911927545179868,5 +142,7,10.498790863023665,5 +142,8,10.253859873171018,5 +142,9,9.7664910364263,5 +143,0,8.013736985604073,5 +143,1,8.195759659998304,5 +143,2,8.050410291256222,5 +143,3,8.216221306680694,5 +143,4,8.077075892247857,5 +143,5,11.04556831094875,5 +143,6,9.956082751234382,5 +143,7,10.212768461708832,5 +143,8,10.611921599854485,5 +143,9,11.145905856941168,5 +144,0,12.925140471595359,5 +144,1,13.118678681759908,5 +144,2,12.854962802775269,5 +144,3,13.990174669269264,5 +144,4,13.514674148039356,5 +144,5,15.981552001633672,5 +144,6,16.60451615158848,5 +144,7,16.053055696251203,5 +144,8,16.58946806801431,5 +144,9,16.424158102766768,5 +145,0,16.146191496029033,3 +145,1,16.147698372154817,3 +145,2,15.66416749039351,3 +145,3,18.02580846178059,3 +145,4,18.732006037341062,3 +145,5,18.506822773607862,3 +145,6,17.71165330960525,3 +145,7,18.837943369066505,3 +145,8,18.067829027262697,3 +145,9,19.416390873461317,3 +146,0,8.168916162269829,5 +146,1,7.9018094803085175,5 +146,2,7.935483164067414,5 +146,3,7.852350614335607,5 +146,4,8.396128718049331,5 +146,5,9.971477657618287,5 +146,6,10.374651677669263,5 +146,7,10.792833925609042,5 +146,8,10.175963470586524,5 +146,9,10.24877037235397,5 +147,0,8.666319346061709,3 +147,1,8.831265713360853,3 +147,2,8.549304430703188,3 +147,3,11.928416480931704,3 +147,4,12.567959357465325,3 +147,5,12.476166558159091,3 +147,6,11.05045984211068,3 +147,7,12.325929422202039,3 +147,8,12.774555261385718,3 +147,9,11.935097096054607,3 +148,0,10.733834817206045,7 +148,1,9.779164576478683,7 +148,2,11.01647918586934,7 +148,3,10.45133123716366,7 +148,4,11.182928610836777,7 +148,5,10.40250964078658,7 +148,6,11.554540214119816,7 +148,7,13.895367439594363,7 +148,8,13.091521171023436,7 +148,9,13.014649811175802,7 +149,0,12.379383767536194,7 +149,1,13.424188521773596,7 +149,2,13.36659832167038,7 +149,3,12.981885582637158,7 +149,4,14.083904666967271,7 +149,5,13.73241009787886,7 +149,6,13.519527309488568,7 +149,7,15.880109085990961,7 +149,8,16.793047558275994,7 +149,9,16.658863760688796,7 +150,0,7.961272812394279,7 +150,1,8.435838352375951,7 +150,2,7.645829864950154,7 +150,3,8.342470893507041,7 +150,4,8.49994700838676,7 +150,5,8.291367588894161,7 +150,6,8.844015011122233,7 +150,7,10.336069115821298,7 +150,8,11.081196197031584,7 +150,9,11.202215228909171,7 +151,0,8.050864367767247,7 +151,1,10.204597159870838,7 +151,2,8.185628025439403,7 +151,3,9.501419795337718,7 +151,4,10.422118325244657,7 +151,5,9.85080094548956,7 +151,6,9.376351316881008,7 +151,7,11.662575777257445,7 +151,8,12.82289718127609,7 +151,9,11.448692042841461,7 +152,0,12.119170702604498,5 +152,1,11.627124102602343,5 +152,2,11.158477486902271,5 +152,3,11.563117615972144,5 +152,4,11.446062981572668,5 +152,5,14.487787752552753,5 +152,6,14.520889057004908,5 +152,7,13.395094585185465,5 +152,8,14.152879092863822,5 +152,9,14.02308232095135,5 +153,0,11.516287133162923,7 +153,1,10.294482589870025,7 +153,2,11.871646701275743,7 +153,3,11.15288477917477,7 +153,4,10.688564945743193,7 +153,5,11.030895056928598,7 +153,6,11.06872272854901,7 +153,7,12.932222947925483,7 +153,8,13.716945578798425,7 +153,9,13.155574373587315,7 +154,0,8.157504753170707,5 +154,1,9.157849622115714,5 +154,2,8.54625975933086,5 +154,3,9.171607423506043,5 +154,4,9.193136413238124,5 +154,5,11.312079821526895,5 +154,6,12.84816940833488,5 +154,7,11.975893641124587,5 +154,8,12.170373362037468,5 +154,9,11.259488709368656,5 +155,0,9.405372504149458,3 +155,1,9.940005385938674,3 +155,2,9.420089614511415,3 +155,3,12.486385492726912,3 +155,4,11.531005778017404,3 +155,5,12.08917100303533,3 +155,6,12.03932825098014,3 +155,7,12.514514400658875,3 +155,8,12.212109035040207,3 +155,9,12.49690725743245,3 +156,0,7.132143337371043,7 +156,1,6.983014504564385,7 +156,2,7.692755317508593,7 +156,3,7.6694025706787485,7 +156,4,7.605901410633453,7 +156,5,7.534979216529057,7 +156,6,8.248480398520034,7 +156,7,9.832291261352946,7 +156,8,9.896859739347068,7 +156,9,10.202553398204099,7 +157,0,10.371945853536094,5 +157,1,8.953474408040066,5 +157,2,10.108580958578345,5 +157,3,8.877066931431617,5 +157,4,10.032289091164982,5 +157,5,12.811193925855035,5 +157,6,12.367986774284322,5 +157,7,12.308965051320564,5 +157,8,12.043427777649132,5 +157,9,12.439176112294946,5 +158,0,9.341193612280412,3 +158,1,9.174236716016411,3 +158,2,9.522287212244654,3 +158,3,11.86641560029531,3 +158,4,11.48361648571677,3 +158,5,11.564084109639747,3 +158,6,11.387103301186002,3 +158,7,12.362587533026536,3 +158,8,12.53988665096562,3 +158,9,12.483055242100349,3 +159,0,10.946925025616846,5 +159,1,10.608273597792875,5 +159,2,10.86034524793539,5 +159,3,10.916258492071295,5 +159,4,10.818569769543409,5 +159,5,12.698928865187689,5 +159,6,14.171416087148467,5 +159,7,12.938364289325758,5 +159,8,12.931400923692294,5 +159,9,13.581344690977566,5 +160,0,9.015272770009469,7 +160,1,8.286949788556804,7 +160,2,9.650684689643835,7 +160,3,9.142248274686551,7 +160,4,8.733880014754106,7 +160,5,9.983737619538903,7 +160,6,9.802183348160044,7 +160,7,10.957074566388009,7 +160,8,12.57793698249083,7 +160,9,12.118744246545065,7 +161,0,10.937978987277756,3 +161,1,11.838186447901169,3 +161,2,11.222725937005098,3 +161,3,13.229035507538264,3 +161,4,13.012926987079284,3 +161,5,13.734002264743818,3 +161,6,13.781378528752269,3 +161,7,13.650417115035134,3 +161,8,13.348083868156477,3 +161,9,13.908041972173317,3 +162,0,11.990532242758588,5 +162,1,11.458694476272557,5 +162,2,11.489525459977564,5 +162,3,12.608008325044542,5 +162,4,11.124487478113814,5 +162,5,14.628136899120287,5 +162,6,14.386390106762745,5 +162,7,15.379554939079611,5 +162,8,14.578697531062721,5 +162,9,15.255173742765606,5 +163,0,10.112401734718038,3 +163,1,10.467732732080004,3 +163,2,10.879276920796958,3 +163,3,12.379580797174643,3 +163,4,13.209136097586136,3 +163,5,12.31400971678369,3 +163,6,13.058155076616123,3 +163,7,13.472178516730859,3 +163,8,13.4408774012321,3 +163,9,14.169081093704639,3 +164,0,10.321793158143077,7 +164,1,10.113938191124113,7 +164,2,10.406345262550847,7 +164,3,11.333734100072625,7 +164,4,11.352474283933097,7 +164,5,11.254641146622115,7 +164,6,10.742034292244842,7 +164,7,13.786946463462936,7 +164,8,12.891160427973137,7 +164,9,14.182987607986856,7 +165,0,11.535029906725667,5 +165,1,10.389536268441509,5 +165,2,10.597864762984605,5 +165,3,10.159880810707955,5 +165,4,9.601204034612326,5 +165,5,12.304951013640624,5 +165,6,12.15986307460636,5 +165,7,12.082808406948226,5 +165,8,13.76749066615356,5 +165,9,13.14274195416862,5 +166,0,10.190012576489819,3 +166,1,9.945657603584703,3 +166,2,10.113733244961109,3 +166,3,12.782444127302496,3 +166,4,12.384000226835221,3 +166,5,12.765515755195194,3 +166,6,12.50216663726032,3 +166,7,12.529092340816803,3 +166,8,12.096711703381898,3 +166,9,11.911294254999264,3 +167,0,8.426102982381105,3 +167,1,8.68513227891092,3 +167,2,8.113818914559909,3 +167,3,10.83964650039699,3 +167,4,10.983760214409557,3 +167,5,9.978879929884615,3 +167,6,10.699321603174553,3 +167,7,11.460515879782038,3 +167,8,11.550413297612177,3 +167,9,10.880046474714424,3 +168,0,10.712212548596485,5 +168,1,11.244192136026852,5 +168,2,11.221525002842066,5 +168,3,10.88534562983965,5 +168,4,10.79900036668447,5 +168,5,12.818316281179365,5 +168,6,13.466898414801445,5 +168,7,13.43054050991806,5 +168,8,13.747690526397717,5 +168,9,13.598925429907194,5 +169,0,10.214808778346741,7 +169,1,9.95272306965187,7 +169,2,10.397771161238863,7 +169,3,10.540838188356341,7 +169,4,9.803163488060614,7 +169,5,9.401354779737648,7 +169,6,11.177173231180296,7 +169,7,12.63375616852915,7 +169,8,11.74818385542496,7 +169,9,12.348517054563665,7 +170,0,14.572273764046106,7 +170,1,14.146498202294744,7 +170,2,14.130305908201626,7 +170,3,15.186777548017675,7 +170,4,14.062693044669286,7 +170,5,14.287259938983773,7 +170,6,14.892029855385324,7 +170,7,17.081564083888132,7 +170,8,17.234508135031557,7 +170,9,17.541955640145666,7 +171,0,13.245999115291243,5 +171,1,13.491401774419689,5 +171,2,13.217105175292943,5 +171,3,13.478058956568843,5 +171,4,13.600476894977302,5 +171,5,15.754258872860003,5 +171,6,16.132979708741864,5 +171,7,16.1242826459229,5 +171,8,15.968818306400577,5 +171,9,15.642146812479965,5 +172,0,10.805528734144357,3 +172,1,10.857684212665129,3 +172,2,11.0682838950196,3 +172,3,14.174955758683204,3 +172,4,12.909806715842384,3 +172,5,13.482837614675681,3 +172,6,13.90592413873515,3 +172,7,13.520694062162693,3 +172,8,14.740615664699085,3 +172,9,13.482673459247264,3 +173,0,8.63593019177049,7 +173,1,8.174482708163975,7 +173,2,8.498123213038818,7 +173,3,8.898352535808796,7 +173,4,9.297037611764331,7 +173,5,9.133432997622252,7 +173,6,9.703443005277833,7 +173,7,11.12640292531413,7 +173,8,11.107801315690963,7 +173,9,12.72587813425016,7 +174,0,7.97229588075418,5 +174,1,7.51543628304389,5 +174,2,7.62804517490913,5 +174,3,7.603837098767494,5 +174,4,8.643461733961557,5 +174,5,10.092988409586544,5 +174,6,10.438858293861408,5 +174,7,10.44342271881403,5 +174,8,10.94092326000363,5 +174,9,10.251805009591154,5 +175,0,11.844499763106223,7 +175,1,12.594434559768526,7 +175,2,12.882765548527678,7 +175,3,12.84855206272655,7 +175,4,12.377559041629066,7 +175,5,13.221028327223427,7 +175,6,12.610355442629912,7 +175,7,14.821516583784538,7 +175,8,14.71030914381673,7 +175,9,15.932865080503746,7 +176,0,10.842209220807952,3 +176,1,10.927995138133713,3 +176,2,11.349104160865501,3 +176,3,14.23182307441364,3 +176,4,12.0708853983392,3 +176,5,12.656310159855723,3 +176,6,12.817410908207014,3 +176,7,12.80008863910293,3 +176,8,12.786706826492422,3 +176,9,13.275671918266594,3 +177,0,10.351984949138519,3 +177,1,11.227807990551927,3 +177,2,11.186135967497528,3 +177,3,13.50080113916505,3 +177,4,13.614151032602349,3 +177,5,13.76179023349656,3 +177,6,13.37358233837128,3 +177,7,14.720858508100054,3 +177,8,13.877502365024272,3 +177,9,12.929044430470825,3 +178,0,6.0096401531685055,7 +178,1,5.740576531537706,7 +178,2,6.719495340627268,7 +178,3,6.749925414024947,7 +178,4,6.676615177101886,7 +178,5,8.016291082251872,7 +178,6,7.385319463090069,7 +178,7,9.573877206774782,7 +178,8,8.845291282080561,7 +178,9,8.911255580386108,7 +179,0,11.873279712664838,7 +179,1,12.041380532942473,7 +179,2,11.126721179559132,7 +179,3,12.064723451691595,7 +179,4,12.261913004553113,7 +179,5,12.247302380230796,7 +179,6,12.048181756943022,7 +179,7,14.285109372155791,7 +179,8,14.566447366329964,7 +179,9,14.800276974921385,7 +180,0,11.21123646770401,5 +180,1,11.54926877890416,5 +180,2,10.9616668146651,5 +180,3,10.980762578371047,5 +180,4,11.527668513195879,5 +180,5,13.714263134634153,5 +180,6,13.291740459799282,5 +180,7,14.004037414911812,5 +180,8,14.198633347006714,5 +180,9,14.454683909258064,5 +181,0,8.668299084530956,7 +181,1,7.445930561841324,7 +181,2,7.9860877358373,7 +181,3,8.084777479480374,7 +181,4,8.13010184690479,7 +181,5,7.954507171365976,7 +181,6,7.636357629555508,7 +181,7,10.525478354185545,7 +181,8,10.19496080139709,7 +181,9,10.722135371544857,7 +182,0,8.659928614250674,7 +182,1,8.60712121217793,7 +182,2,9.37409956528855,7 +182,3,9.43241114850335,7 +182,4,9.74659137076057,7 +182,5,10.649882268308914,7 +182,6,9.795595150705525,7 +182,7,11.999491268632166,7 +182,8,11.31816972066644,7 +182,9,11.313496664831915,7 +183,0,11.318849132099707,5 +183,1,10.165694518746388,5 +183,2,10.205462372978719,5 +183,3,10.807884948104764,5 +183,4,10.882338202024346,5 +183,5,12.576181155491394,5 +183,6,12.974947799356489,5 +183,7,13.842122267538091,5 +183,8,12.924437604297141,5 +183,9,12.730598166529955,5 +184,0,10.576230407162383,7 +184,1,10.451082052681505,7 +184,2,9.994674286388156,7 +184,3,9.613850088649029,7 +184,4,10.132621918493205,7 +184,5,10.577581378287851,7 +184,6,11.287769641654396,7 +184,7,12.286642609277656,7 +184,8,12.862673655956751,7 +184,9,13.342505926659936,7 +185,0,8.22502395833193,3 +185,1,10.032550712362339,3 +185,2,9.02443234228967,3 +185,3,11.881655375121356,3 +185,4,11.93899920319826,3 +185,5,12.457950824124811,3 +185,6,12.29111981883966,3 +185,7,10.927012382527899,3 +185,8,11.823268004814894,3 +185,9,12.38476019306953,3 +186,0,9.695533520808185,3 +186,1,10.131828828746462,3 +186,2,10.38770674170293,3 +186,3,12.397249332082918,3 +186,4,12.085885992590846,3 +186,5,11.369854958307839,3 +186,6,12.16528283944067,3 +186,7,12.701207537937554,3 +186,8,12.04779983759284,3 +186,9,13.008660828928866,3 +187,0,9.950241892219882,7 +187,1,9.65947873173415,7 +187,2,8.164155310430031,7 +187,3,9.157001062230329,7 +187,4,10.83913982286427,7 +187,5,9.72277634707204,7 +187,6,9.10397363202173,7 +187,7,12.813668937741365,7 +187,8,12.302723039358066,7 +187,9,12.948515488065093,7 +188,0,10.44360436181462,5 +188,1,10.634404592311022,5 +188,2,10.686629648062194,5 +188,3,10.953869180165437,5 +188,4,11.843840248482987,5 +188,5,13.290628192184531,5 +188,6,12.114955365282169,5 +188,7,13.228455781844389,5 +188,8,13.675865408256547,5 +188,9,13.80647542747301,5 +189,0,12.729681454456763,3 +189,1,13.625456168580376,3 +189,2,13.606855145761916,3 +189,3,16.52924266447118,3 +189,4,14.967983077862058,3 +189,5,15.1892836577861,3 +189,6,15.389924421756335,3 +189,7,15.397872870966925,3 +189,8,15.297973571104512,3 +189,9,14.461646193149441,3 +190,0,4.649333381275838,7 +190,1,5.151362147987356,7 +190,2,4.82007219538445,7 +190,3,6.1343737099019595,7 +190,4,5.2733710244953365,7 +190,5,6.012197645729472,7 +190,6,5.59739332450239,7 +190,7,7.979886773084034,7 +190,8,7.806303895104186,7 +190,9,8.451075434089187,7 +191,0,9.554269146046833,3 +191,1,9.874359825025024,3 +191,2,9.909465937805653,3 +191,3,11.682156741916973,3 +191,4,12.128790630442971,3 +191,5,13.143492594079474,3 +191,6,11.991100999932911,3 +191,7,12.436031917407256,3 +191,8,12.955188946415355,3 +191,9,12.996426111452772,3 +192,0,10.358883051604577,7 +192,1,9.261330491266213,7 +192,2,10.811376252069126,7 +192,3,9.543287713433369,7 +192,4,11.48799345821551,7 +192,5,10.937761778341525,7 +192,6,11.452861852680888,7 +192,7,12.601969273654367,7 +192,8,12.376771106094868,7 +192,9,13.383402034815477,7 +193,0,10.575186259396128,3 +193,1,10.446217566295616,3 +193,2,10.83431861974058,3 +193,3,14.27570592539033,3 +193,4,13.409250753058728,3 +193,5,13.61673728230586,3 +193,6,13.613956425150876,3 +193,7,14.031719051300295,3 +193,8,13.296952142923827,3 +193,9,12.464607325536553,3 +194,0,9.161877770854415,7 +194,1,9.76203869235567,7 +194,2,8.971270639799883,7 +194,3,9.467987862112755,7 +194,4,9.310285965856263,7 +194,5,10.507185463278985,7 +194,6,10.2308993450607,7 +194,7,12.2819411476794,7 +194,8,12.366971493917907,7 +194,9,12.137546211081357,7 +195,0,6.585334576306587,7 +195,1,5.993964144776676,7 +195,2,5.5933354456675515,7 +195,3,6.861597000215347,7 +195,4,7.048551563178908,7 +195,5,7.591188719255523,7 +195,6,7.872224343084019,7 +195,7,9.971555677542309,7 +195,8,8.787544638395897,7 +195,9,9.21042391756795,7 +196,0,10.896158115368909,7 +196,1,12.021943306299306,7 +196,2,11.596040569285176,7 +196,3,11.51568954842741,7 +196,4,10.862096989653574,7 +196,5,10.714391151850656,7 +196,6,11.650444909119837,7 +196,7,12.805660335311037,7 +196,8,13.336772791938069,7 +196,9,13.498601992927258,7 +197,0,14.251156472889773,5 +197,1,13.37735177684853,5 +197,2,12.43507560904206,5 +197,3,14.006922752159603,5 +197,4,13.505189501776407,5 +197,5,15.976015940547242,5 +197,6,16.011348077907304,5 +197,7,16.017848430419484,5 +197,8,16.649870719220722,5 +197,9,15.588267989152017,5 +198,0,6.673770077690544,5 +198,1,7.374296146726414,5 +198,2,6.82523480249554,5 +198,3,6.427873835532738,5 +198,4,7.666554861796093,5 +198,5,9.993824505388494,5 +198,6,9.509779586346221,5 +198,7,8.651854390932957,5 +198,8,10.085262063517133,5 +198,9,9.00233324945788,5 +199,0,11.88205566582046,7 +199,1,11.383924115025296,7 +199,2,11.347736384512894,7 +199,3,11.806701018856533,7 +199,4,12.036007249182747,7 +199,5,11.905551902203152,7 +199,6,12.62200766650395,7 +199,7,14.204621338166959,7 +199,8,15.017751463232289,7 +199,9,14.768242610218117,7 +200,0,8.99417095045606,3 +200,1,9.00997961183807,3 +200,2,8.719201404230493,3 +200,3,11.788316142687625,3 +200,4,11.565655780128534,3 +200,5,10.656851789722456,3 +200,6,11.675092117369513,3 +200,7,11.592432539126737,3 +200,8,12.930788349839544,3 +200,9,11.391560247807558,3 +201,0,9.156675908475279,7 +201,1,10.532336129895155,7 +201,2,10.755732887074286,7 +201,3,9.804859838979485,7 +201,4,9.96130839693785,7 +201,5,10.685469006618963,7 +201,6,9.31380140986965,7 +201,7,11.646754466089527,7 +201,8,12.536984607916951,7 +201,9,13.552296591406868,7 +202,0,7.676337934508732,5 +202,1,8.044671374459679,5 +202,2,8.661097614874107,5 +202,3,9.247645345231176,5 +202,4,8.005778742017945,5 +202,5,10.385784616725388,5 +202,6,11.405230589263523,5 +202,7,9.833103340996386,5 +202,8,11.207993513869013,5 +202,9,10.285045212411474,5 +203,0,9.382323838839826,5 +203,1,9.309778131588109,5 +203,2,9.052081854937631,5 +203,3,9.13953669145911,5 +203,4,9.869294837415124,5 +203,5,12.328857319369588,5 +203,6,11.866219553693995,5 +203,7,12.066144234654855,5 +203,8,12.41667468843377,5 +203,9,11.745278268781767,5 +204,0,12.969935443604378,5 +204,1,13.10976073792779,5 +204,2,14.10190233877312,5 +204,3,12.712037094118125,5 +204,4,12.771048288563852,5 +204,5,15.484572125184789,5 +204,6,15.609641273853985,5 +204,7,15.03254561270956,5 +204,8,15.198507850932893,5 +204,9,15.770654608506938,5 +205,0,10.751518363608083,5 +205,1,11.060043489967907,5 +205,2,11.41010686765963,5 +205,3,11.963279826386543,5 +205,4,11.263011175885644,5 +205,5,12.744118252960078,5 +205,6,12.807011750216308,5 +205,7,13.235603950622346,5 +205,8,13.308726624875446,5 +205,9,13.98800510466546,5 +206,0,12.870183134021085,3 +206,1,13.213245730909286,3 +206,2,13.277580268328862,3 +206,3,15.544369280395891,3 +206,4,16.52587537658145,3 +206,5,16.295597005535146,3 +206,6,16.07268791845789,3 +206,7,15.934110070529467,3 +206,8,16.04800688250838,3 +206,9,16.52849644579586,3 +207,0,12.646908314631531,3 +207,1,12.001015024905087,3 +207,2,12.573064155762502,3 +207,3,13.639013993623873,3 +207,4,14.860680670040686,3 +207,5,15.23589839893473,3 +207,6,14.814789464514948,3 +207,7,15.963087276025439,3 +207,8,14.714204554237362,3 +207,9,15.363543985607018,3 +208,0,10.860260984526175,3 +208,1,10.928024524137449,3 +208,2,11.021012273856778,3 +208,3,13.231725872863889,3 +208,4,13.702152729591003,3 +208,5,14.067041475996525,3 +208,6,13.477048235607073,3 +208,7,13.985831393944968,3 +208,8,13.327289884725761,3 +208,9,14.161151084222269,3 +209,0,13.10687329042209,3 +209,1,14.133199020379651,3 +209,2,14.793189491032203,3 +209,3,15.72682710834989,3 +209,4,14.79159730693188,3 +209,5,15.713832585713174,3 +209,6,16.703806619480236,3 +209,7,16.46440744584821,3 +209,8,16.412325851673103,3 +209,9,16.892203230313005,3 +210,0,11.12021303710081,5 +210,1,11.586848911202166,5 +210,2,11.53769815580009,5 +210,3,11.938836543845078,5 +210,4,11.457384359429726,5 +210,5,13.830616018223575,5 +210,6,13.324422780933705,5 +210,7,13.277255911697095,5 +210,8,14.531370137686086,5 +210,9,13.886088655637943,5 +211,0,12.058416636525308,7 +211,1,11.817846360799813,7 +211,2,11.562718528134136,7 +211,3,11.96146779988532,7 +211,4,12.773973362477976,7 +211,5,12.43359639073383,7 +211,6,11.400239773728888,7 +211,7,14.24835412530604,7 +211,8,15.347123249946476,7 +211,9,14.44907792266914,7 +212,0,8.506618747063493,5 +212,1,10.453382535641856,5 +212,2,9.94805434783698,5 +212,3,9.671851663524185,5 +212,4,9.62114922488955,5 +212,5,12.448034057826451,5 +212,6,11.735866392817487,5 +212,7,12.680986305188577,5 +212,8,12.477955735792712,5 +212,9,12.429199055674259,5 +213,0,10.205849034386206,5 +213,1,10.14560761336132,5 +213,2,10.855911691823042,5 +213,3,10.000420215667493,5 +213,4,9.722129068303305,5 +213,5,12.940311693164647,5 +213,6,13.048710795751983,5 +213,7,13.073212207974292,5 +213,8,13.13695581652743,5 +213,9,12.872821442549636,5 +214,0,8.99889370559221,7 +214,1,8.682373290022175,7 +214,2,8.561595572991857,7 +214,3,8.749206697271747,7 +214,4,10.015472202787363,7 +214,5,8.237664312376731,7 +214,6,9.455046405547346,7 +214,7,10.465597139587588,7 +214,8,11.351035684237962,7 +214,9,11.288338610822354,7 +215,0,10.936653720849767,5 +215,1,12.273686664406586,5 +215,2,11.989148838093607,5 +215,3,11.229564697909083,5 +215,4,12.798951914576833,5 +215,5,15.196670958184173,5 +215,6,15.32423004033799,5 +215,7,14.85864608951544,5 +215,8,14.802196938478264,5 +215,9,15.543246179637604,5 +216,0,8.484584546440319,3 +216,1,8.310177105208082,3 +216,2,7.372623952655369,3 +216,3,10.04406518037773,3 +216,4,10.263150825915982,3 +216,5,9.238584269311653,3 +216,6,10.478073083580657,3 +216,7,11.479888666759964,3 +216,8,10.763360836670664,3 +216,9,11.280099294491146,3 +217,0,11.656158049899675,7 +217,1,11.628091522843413,7 +217,2,11.95533637987581,7 +217,3,11.456787360843107,7 +217,4,11.482813091218128,7 +217,5,12.697852963420006,7 +217,6,12.850871856268581,7 +217,7,13.769147527041646,7 +217,8,14.848700087112256,7 +217,9,14.421220697032313,7 +218,0,9.788602172234896,5 +218,1,9.948807204707212,5 +218,2,10.315873119051838,5 +218,3,9.960056985921275,5 +218,4,10.72377888264065,5 +218,5,12.273404304869644,5 +218,6,12.530790021224446,5 +218,7,11.902066589221057,5 +218,8,13.498027390342099,5 +218,9,12.753610346592165,5 +219,0,12.698992899607187,5 +219,1,11.638401281328868,5 +219,2,12.131143175395087,5 +219,3,13.261436030894005,5 +219,4,12.885939746080942,5 +219,5,13.885604319520176,5 +219,6,15.461639151426548,5 +219,7,14.770602784758276,5 +219,8,15.848152679872996,5 +219,9,15.19857634763279,5 +220,0,11.604091320472785,5 +220,1,12.191215266758345,5 +220,2,11.252991492035187,5 +220,3,11.305314573545552,5 +220,4,11.803488767263467,5 +220,5,14.415324377273132,5 +220,6,13.436299351155775,5 +220,7,13.36772067751797,5 +220,8,14.713598440984416,5 +220,9,14.100248821179019,5 +221,0,13.445179308833461,5 +221,1,12.81288711432352,5 +221,2,13.501802658117612,5 +221,3,13.937508666978028,5 +221,4,14.50648124410505,5 +221,5,15.755677232176286,5 +221,6,17.069980375409852,5 +221,7,15.9990143032037,5 +221,8,16.099746414501013,5 +221,9,17.309971091242502,5 +222,0,11.567279458459248,3 +222,1,11.285537017771501,3 +222,2,11.583982816686992,3 +222,3,14.43808617104832,3 +222,4,14.594650792090132,3 +222,5,13.553152792642162,3 +222,6,14.040332477237737,3 +222,7,14.206627796111077,3 +222,8,13.768429327018993,3 +222,9,14.333706855952293,3 +223,0,6.858580918919369,5 +223,1,7.526024228161886,5 +223,2,7.659632002849119,5 +223,3,6.991314789514014,5 +223,4,7.148942916763113,5 +223,5,9.688065639721337,5 +223,6,9.163325264230014,5 +223,7,9.85023956456222,5 +223,8,9.611966245008912,5 +223,9,9.271220199824208,5 +224,0,9.387675297763776,7 +224,1,10.68962300221915,7 +224,2,9.98788291463195,7 +224,3,10.986270701152755,7 +224,4,10.009275523846144,7 +224,5,10.511696254769346,7 +224,6,11.662432015712719,7 +224,7,12.037400281871959,7 +224,8,11.717186404253711,7 +224,9,13.409388719847216,7 +225,0,7.242581062113832,3 +225,1,7.745632432757251,3 +225,2,7.563026509584576,3 +225,3,10.090067005288025,3 +225,4,9.580717511217497,3 +225,5,10.066015570868716,3 +225,6,10.480893278088637,3 +225,7,9.851831171673238,3 +225,8,10.105943599987423,3 +225,9,10.382294198011103,3 +226,0,8.249032598154312,5 +226,1,8.852159823868078,5 +226,2,9.222838585218902,5 +226,3,9.063665844473238,5 +226,4,9.227752068801589,5 +226,5,11.45348947981416,5 +226,6,11.235568584051293,5 +226,7,12.008971598200889,5 +226,8,12.48142937766834,5 +226,9,11.631250119169728,5 +227,0,13.265055982292298,3 +227,1,13.00264828883612,3 +227,2,13.405200539072696,3 +227,3,16.105346632155072,3 +227,4,15.676786638062493,3 +227,5,14.635749597148603,3 +227,6,15.849454682276923,3 +227,7,16.33888873820795,3 +227,8,15.711658781554393,3 +227,9,16.42027426961236,3 +228,0,10.930009851219419,5 +228,1,10.632298042034254,5 +228,2,11.545652131211712,5 +228,3,10.696829261422236,5 +228,4,11.555182115348597,5 +228,5,14.418488753118144,5 +228,6,12.920186168922251,5 +228,7,13.335319223838443,5 +228,8,13.559051499214604,5 +228,9,14.143407691939913,5 +229,0,8.419280750019402,5 +229,1,9.40170733369055,5 +229,2,9.279313307388355,5 +229,3,9.149775615876807,5 +229,4,9.074954300014449,5 +229,5,11.298754031334921,5 +229,6,10.771147965185818,5 +229,7,11.730744319620813,5 +229,8,10.846117015662907,5 +229,9,11.919827639793622,5 +230,0,8.565636518178618,7 +230,1,8.177997568163246,7 +230,2,8.084458737638213,7 +230,3,8.47272022127404,7 +230,4,6.941806082068867,7 +230,5,8.623694376361934,7 +230,6,8.5158882681993,7 +230,7,10.647251210590706,7 +230,8,11.125831523568117,7 +230,9,10.835180806336838,7 +231,0,9.495460757426942,3 +231,1,10.094862850339831,3 +231,2,10.299341927792932,3 +231,3,12.423031141543028,3 +231,4,12.546523869395056,3 +231,5,13.085248708211799,3 +231,6,12.122546909235448,3 +231,7,12.853414413834946,3 +231,8,13.510529035061642,3 +231,9,12.098258437283924,3 +232,0,7.656703041627771,3 +232,1,7.3741904502512226,3 +232,2,7.581524509001192,3 +232,3,9.414475949630534,3 +232,4,9.882104343387521,3 +232,5,10.987507508451198,3 +232,6,9.534664520649137,3 +232,7,9.682074699998331,3 +232,8,9.715912416155993,3 +232,9,11.330588844551794,3 +233,0,8.426549592446193,5 +233,1,8.744899915032999,5 +233,2,8.656327974653717,5 +233,3,8.485508006005292,5 +233,4,8.94014412189962,5 +233,5,12.386841248075116,5 +233,6,11.953754905693964,5 +233,7,10.9312505363107,5 +233,8,11.78150649673768,5 +233,9,11.14195805887365,5 +234,0,9.735835123679378,7 +234,1,11.233774401934802,7 +234,2,11.404065366424263,7 +234,3,11.447045068626439,7 +234,4,10.789086318492702,7 +234,5,11.040631076101494,7 +234,6,11.598901719931549,7 +234,7,13.519766961776542,7 +234,8,13.227200207453967,7 +234,9,12.714483033043406,7 +235,0,12.367561669284399,7 +235,1,12.950128891831449,7 +235,2,11.911829927696314,7 +235,3,12.930003763051383,7 +235,4,12.87639491978024,7 +235,5,13.18569820382956,7 +235,6,13.346611822072896,7 +235,7,14.663391451250716,7 +235,8,14.302144372130922,7 +235,9,14.943864309371953,7 +236,0,11.662086603212382,3 +236,1,11.046520992685894,3 +236,2,11.070501282520057,3 +236,3,12.60684777348656,3 +236,4,14.49384672172435,3 +236,5,14.130379765161534,3 +236,6,14.20247093508626,3 +236,7,14.401385637838615,3 +236,8,14.305877125220581,3 +236,9,13.995358516859115,3 +237,0,5.3355389307426995,3 +237,1,5.303638800507737,3 +237,2,6.9347913600113005,3 +237,3,7.995376998453521,3 +237,4,7.585520762128083,3 +237,5,8.15016936988694,3 +237,6,7.511986523875642,3 +237,7,8.869674342098413,3 +237,8,7.88688643490023,3 +237,9,8.567963699115033,3 +238,0,11.141722272932087,7 +238,1,10.901117730163016,7 +238,2,10.85596845397034,7 +238,3,10.914134500488528,7 +238,4,10.980512909616968,7 +238,5,10.82038421359677,7 +238,6,10.983991743175668,7 +238,7,14.048528644885987,7 +238,8,13.354465141717942,7 +238,9,13.115835323369135,7 +239,0,9.885918348034402,3 +239,1,10.794252486813768,3 +239,2,10.681870463047394,3 +239,3,12.724231735581286,3 +239,4,12.41530654701026,3 +239,5,12.930748381551629,3 +239,6,12.422286583989832,3 +239,7,12.714260130792269,3 +239,8,13.42896939016103,3 +239,9,13.481205985924689,3 +240,0,10.336825647769984,7 +240,1,10.500017344291098,7 +240,2,11.08213164715209,7 +240,3,11.990846758080142,7 +240,4,10.26999798851089,7 +240,5,11.466947040530638,7 +240,6,10.739758904958812,7 +240,7,13.210615073982613,7 +240,8,13.445566361713265,7 +240,9,13.517526399026941,7 +241,0,12.96552139347281,3 +241,1,13.336022893490599,3 +241,2,13.164791579987124,3 +241,3,14.80119067087534,3 +241,4,16.0223541695638,3 +241,5,16.108855446876856,3 +241,6,16.254006518313748,3 +241,7,15.162052115170372,3 +241,8,16.18187843959449,3 +241,9,16.470408791205877,3 +242,0,5.357922412170722,7 +242,1,5.978110070481579,7 +242,2,6.015390123735331,7 +242,3,7.094598775674646,7 +242,4,7.235771464896802,7 +242,5,5.541345249725414,7 +242,6,7.720486949254509,7 +242,7,8.115910585813852,7 +242,8,9.163860093554227,7 +242,9,9.25877243682238,7 +243,0,8.319948036196886,3 +243,1,9.164427046537337,3 +243,2,8.964026277484756,3 +243,3,11.394958151933984,3 +243,4,11.333129300808102,3 +243,5,11.915962892302911,3 +243,6,11.45549292119082,3 +243,7,11.433856114904632,3 +243,8,12.16582526614638,3 +243,9,12.00799113204709,3 +244,0,11.547719342747154,5 +244,1,10.949519309457411,5 +244,2,10.859181254887472,5 +244,3,11.19795157396036,5 +244,4,11.271401952746416,5 +244,5,14.074912279913168,5 +244,6,14.269804670733736,5 +244,7,13.804247500820509,5 +244,8,14.697272154998887,5 +244,9,14.766047342469275,5 +245,0,6.958510659292385,5 +245,1,6.525865315320595,5 +245,2,7.824499525809886,5 +245,3,7.106302525577911,5 +245,4,7.299893799307603,5 +245,5,9.16700868828883,5 +245,6,8.991916037943517,5 +245,7,8.80249148593447,5 +245,8,9.95324536287076,5 +245,9,9.345110674143806,5 +246,0,14.48166371060543,3 +246,1,12.508861304800778,3 +246,2,12.523206858616332,3 +246,3,15.506967453913738,3 +246,4,16.116554835590666,3 +246,5,15.484192924578629,3 +246,6,15.838019306200653,3 +246,7,14.730124387377662,3 +246,8,15.413142323261134,3 +246,9,15.315090231538234,3 +247,0,9.789190300427377,5 +247,1,10.89520334241326,5 +247,2,9.8152994145961,5 +247,3,10.375615242357593,5 +247,4,10.868116328394608,5 +247,5,12.82107030742578,5 +247,6,13.53491747650485,5 +247,7,13.589503321352556,5 +247,8,13.983612598369046,5 +247,9,14.012905091757656,5 +248,0,12.263574366367498,5 +248,1,11.792208771682281,5 +248,2,12.044901422095142,5 +248,3,12.248924822646405,5 +248,4,12.16399632038232,5 +248,5,13.994570816728272,5 +248,6,14.126946704516923,5 +248,7,13.984388772521825,5 +248,8,14.703976386641724,5 +248,9,14.546160384644232,5 +249,0,8.526525801638028,7 +249,1,8.626990267294115,7 +249,2,8.452913313065016,7 +249,3,9.647533347575012,7 +249,4,9.297421511722305,7 +249,5,8.926712667169776,7 +249,6,9.86238655175742,7 +249,7,11.277888272059224,7 +249,8,10.954862363450296,7 +249,9,12.268723821565978,7 +250,0,11.358978885149364,7 +250,1,11.523077834828026,7 +250,2,11.848117390107008,7 +250,3,12.498821991327478,7 +250,4,12.037546025365577,7 +250,5,11.527968816859806,7 +250,6,12.49946588198851,7 +250,7,14.085032517801558,7 +250,8,15.371489365697155,7 +250,9,14.526967518933475,7 +251,0,11.019425363028745,5 +251,1,12.415625598055914,5 +251,2,13.30924200810819,5 +251,3,13.224903256250773,5 +251,4,13.45242025635636,5 +251,5,14.127063019287345,5 +251,6,15.193019309966466,5 +251,7,14.47594584210972,5 +251,8,15.04908781168654,5 +251,9,14.400466897608212,5 +252,0,10.143528157832979,5 +252,1,10.227632691592529,5 +252,2,11.115033680345645,5 +252,3,11.166115601109166,5 +252,4,11.004165904939748,5 +252,5,12.290545331246644,5 +252,6,12.909936323299478,5 +252,7,12.792697798802974,5 +252,8,13.700593918621584,5 +252,9,13.95484846668293,5 +253,0,10.996994578412835,5 +253,1,10.191192075688882,5 +253,2,9.299035224252652,5 +253,3,9.885320836332365,5 +253,4,11.579615274438497,5 +253,5,12.76817431573748,5 +253,6,12.693439476264835,5 +253,7,13.990131932115508,5 +253,8,13.901381758141557,5 +253,9,13.381026146357286,5 +254,0,10.393984933031463,5 +254,1,10.574635872156888,5 +254,2,10.854267877757183,5 +254,3,11.640710119223256,5 +254,4,11.151920190682064,5 +254,5,13.403700995083021,5 +254,6,13.725321533098537,5 +254,7,12.65584847805124,5 +254,8,13.718117424800473,5 +254,9,13.293402101232646,5 +255,0,7.015261872634854,7 +255,1,7.309224699528331,7 +255,2,7.964250664761893,7 +255,3,8.838921084817528,7 +255,4,8.730209253191386,7 +255,5,8.865492824109069,7 +255,6,9.848781211864583,7 +255,7,10.33876466309133,7 +255,8,11.059592793429376,7 +255,9,11.325527231928145,7 +256,0,9.336806081250126,3 +256,1,9.819106012060763,3 +256,2,9.10496797247712,3 +256,3,12.709895748156809,3 +256,4,11.764135363254077,3 +256,5,13.220642721908545,3 +256,6,12.288705477560566,3 +256,7,12.918969422218884,3 +256,8,12.478291205023718,3 +256,9,12.589570723389029,3 +257,0,9.402940268484278,3 +257,1,9.854516899778421,3 +257,2,10.053954282302305,3 +257,3,11.602825680637693,3 +257,4,12.032432727165341,3 +257,5,11.889905502066725,3 +257,6,13.444786969902701,3 +257,7,12.350089550139472,3 +257,8,13.267571010943563,3 +257,9,12.497143769691439,3 +258,0,10.896431549474444,5 +258,1,10.206105692884355,5 +258,2,10.743251432773057,5 +258,3,11.172055064101615,5 +258,4,11.959789644186925,5 +258,5,13.377779117545126,5 +258,6,14.05864790930349,5 +258,7,13.78715228380845,5 +258,8,14.380488398439335,5 +258,9,13.637266620054463,5 +259,0,13.194759849146491,3 +259,1,11.856950392363526,3 +259,2,11.816417576721753,3 +259,3,13.694622068976072,3 +259,4,15.565160829599623,3 +259,5,14.909372280129537,3 +259,6,14.432588127130433,3 +259,7,15.324159842257336,3 +259,8,15.14349458637948,3 +259,9,15.882712229508384,3 +260,0,7.109011419673227,3 +260,1,7.850247781886054,3 +260,2,7.851667144071769,3 +260,3,9.196086497838882,3 +260,4,10.813091438433029,3 +260,5,10.247729558141502,3 +260,6,10.852743438536463,3 +260,7,11.762813803207987,3 +260,8,10.506305990129379,3 +260,9,10.572351344714605,3 +261,0,9.809314024222843,3 +261,1,10.34360561585255,3 +261,2,9.66662542670577,3 +261,3,11.73450114079478,3 +261,4,12.678576574481529,3 +261,5,12.439301601279864,3 +261,6,11.327617807858434,3 +261,7,11.83728603962629,3 +261,8,13.300599871274452,3 +261,9,12.850946080900252,3 +262,0,12.581813129799322,7 +262,1,12.977706266700455,7 +262,2,13.16952943171878,7 +262,3,13.630777775458029,7 +262,4,12.683779397888552,7 +262,5,13.042436525982717,7 +262,6,13.69938897274964,7 +262,7,15.61299380507022,7 +262,8,16.0488406866363,7 +262,9,15.248157396638147,7 +263,0,8.691387880510533,7 +263,1,9.158615352063608,7 +263,2,8.956232815949512,7 +263,3,8.882379412255668,7 +263,4,8.864881741091887,7 +263,5,8.59539796598912,7 +263,6,9.076824246154978,7 +263,7,11.842676468815073,7 +263,8,11.25330928907481,7 +263,9,10.967921547700666,7 +264,0,8.109376421036666,7 +264,1,8.555925114057324,7 +264,2,8.368742966828934,7 +264,3,8.898151606950929,7 +264,4,9.016921845793874,7 +264,5,9.299991555621682,7 +264,6,9.667279701551875,7 +264,7,11.553429327605539,7 +264,8,10.68838043334538,7 +264,9,10.803732742131222,7 +265,0,10.366726645432475,5 +265,1,10.27902470504268,5 +265,2,10.035449020181915,5 +265,3,10.483766486575892,5 +265,4,10.567355916852868,5 +265,5,12.568688513781785,5 +265,6,12.846717640584885,5 +265,7,13.693837808821552,5 +265,8,13.304231404287007,5 +265,9,14.540516921690122,5 +266,0,11.774024473980464,7 +266,1,11.643737494002014,7 +266,2,11.890836822298072,7 +266,3,12.11701637019241,7 +266,4,12.306397359556172,7 +266,5,13.227126043900151,7 +266,6,11.866110148481855,7 +266,7,14.818293410059507,7 +266,8,14.592645679777702,7 +266,9,13.803221509685331,7 +267,0,10.4082015021687,3 +267,1,10.75666389152327,3 +267,2,9.794263909232603,3 +267,3,12.318385177175333,3 +267,4,11.560174980117468,3 +267,5,11.89344888127915,3 +267,6,13.146866899929945,3 +267,7,12.489643858117601,3 +267,8,12.2684506236523,3 +267,9,13.037582293350534,3 +268,0,12.396714965158468,7 +268,1,12.522510351986211,7 +268,2,13.22758419371143,7 +268,3,12.377054501406807,7 +268,4,12.605238574533063,7 +268,5,13.234961176948413,7 +268,6,13.742626076248081,7 +268,7,15.977001331618903,7 +268,8,15.125390320693167,7 +268,9,16.026609664333805,7 +269,0,11.321516818397848,5 +269,1,12.726692638586453,5 +269,2,11.346702452423187,5 +269,3,12.677827370528918,5 +269,4,11.78534491172369,5 +269,5,15.220211192530353,5 +269,6,14.609248329559295,5 +269,7,13.905579700428941,5 +269,8,14.252468725361801,5 +269,9,15.004178567744924,5 +270,0,12.209962342587904,7 +270,1,11.748282337917892,7 +270,2,11.905969274261857,7 +270,3,12.362816650102157,7 +270,4,12.149539347285534,7 +270,5,11.536887704110935,7 +270,6,11.877153121460033,7 +270,7,14.501200105355956,7 +270,8,15.09138134202897,7 +270,9,15.391631345861148,7 +271,0,11.49799771303391,3 +271,1,11.351878383166367,3 +271,2,11.682639945639425,3 +271,3,13.074857577544758,3 +271,4,13.43161752251908,3 +271,5,13.196167641978448,3 +271,6,13.08386169729433,3 +271,7,13.975007508466287,3 +271,8,14.000989655940558,3 +271,9,14.352059244902692,3 +272,0,14.928362116138393,5 +272,1,14.8766283363541,5 +272,2,15.133614533530947,5 +272,3,14.933964652578432,5 +272,4,15.26175189830337,5 +272,5,16.79223399934899,5 +272,6,17.440159163511733,5 +272,7,17.689515043335174,5 +272,8,17.12046548665607,5 +272,9,17.581222250188596,5 +273,0,9.27787536745323,7 +273,1,9.426423274009853,7 +273,2,9.227211944716833,7 +273,3,10.52256408485119,7 +273,4,10.522519209627935,7 +273,5,10.35839354938158,7 +273,6,11.175774598239817,7 +273,7,12.195053773983714,7 +273,8,13.905557829823229,7 +273,9,12.3759119145863,7 +274,0,6.1205520917449805,5 +274,1,5.894066458896868,5 +274,2,6.379118195897125,5 +274,3,6.710762879810094,5 +274,4,6.335997219340431,5 +274,5,9.381315339425784,5 +274,6,8.712530058183702,5 +274,7,8.828198108922072,5 +274,8,8.338456568610113,5 +274,9,9.102204953142165,5 +275,0,13.141495112629599,5 +275,1,13.004388858365637,5 +275,2,13.020271518960099,5 +275,3,14.844390412912624,5 +275,4,13.476770097944764,5 +275,5,14.980016012643647,5 +275,6,15.459397993167995,5 +275,7,15.882724803296174,5 +275,8,16.281842239342524,5 +275,9,16.38590698389694,5 +276,0,9.42290841397461,3 +276,1,8.862170984767683,3 +276,2,9.454821359837121,3 +276,3,10.94657140603106,3 +276,4,11.174779646579344,3 +276,5,11.125746650836218,3 +276,6,11.090406710787214,3 +276,7,12.316705882515352,3 +276,8,12.228906988575794,3 +276,9,12.472933823075413,3 +277,0,10.235417421698214,5 +277,1,10.704164738118639,5 +277,2,9.535116186364792,5 +277,3,10.947304647708744,5 +277,4,10.704320803688582,5 +277,5,13.029315688760649,5 +277,6,13.005772538210373,5 +277,7,12.422233311417623,5 +277,8,12.266741000990544,5 +277,9,12.759733805594585,5 +278,0,12.90156001035751,3 +278,1,13.064147077308752,3 +278,2,12.135130231030121,3 +278,3,14.428551685986948,3 +278,4,14.657483526750635,3 +278,5,14.940448839587647,3 +278,6,14.538075954294728,3 +278,7,14.608075486650987,3 +278,8,15.008183721517279,3 +278,9,15.562228287833557,3 +279,0,6.205183748005404,3 +279,1,6.837579162896266,3 +279,2,7.524757325953058,3 +279,3,8.444591049175393,3 +279,4,9.092258038676881,3 +279,5,9.807859284757535,3 +279,6,9.186346722700245,3 +279,7,9.20864607245294,3 +279,8,10.177228825035385,3 +279,9,9.542379878396376,3 +280,0,8.048953084390506,7 +280,1,7.81118826735132,7 +280,2,7.525283245332804,7 +280,3,8.036550837086487,7 +280,4,8.056930328710742,7 +280,5,8.507146982288958,7 +280,6,7.822532802855212,7 +280,7,10.21730860517853,7 +280,8,10.265947731985404,7 +280,9,10.748779642607614,7 +281,0,6.284854528540205,5 +281,1,7.40593264988122,5 +281,2,7.230608511847501,5 +281,3,6.438981588945819,5 +281,4,7.801575538502544,5 +281,5,10.414490666161159,5 +281,6,9.898361110536998,5 +281,7,10.210936509375527,5 +281,8,9.640129816563586,5 +281,9,8.866691665893036,5 +282,0,7.489667194365389,5 +282,1,8.639268441883031,5 +282,2,8.315738016084161,5 +282,3,8.254439280785476,5 +282,4,8.780354077964814,5 +282,5,11.219520063879797,5 +282,6,10.285348646568078,5 +282,7,10.887288609808959,5 +282,8,11.96666276150931,5 +282,9,10.774083229838466,5 +283,0,11.314153805616465,5 +283,1,10.736907831259488,5 +283,2,11.352170091958962,5 +283,3,11.72793696563542,5 +283,4,11.649914095526498,5 +283,5,13.833853013140924,5 +283,6,12.903476445330007,5 +283,7,14.136103065404635,5 +283,8,13.252926654716576,5 +283,9,13.909044094819755,5 +284,0,11.177498779124155,7 +284,1,11.390936965060332,7 +284,2,11.416854023655636,7 +284,3,10.92647069636318,7 +284,4,10.773441979477685,7 +284,5,11.984550025606426,7 +284,6,11.741292974384999,7 +284,7,14.32306410610445,7 +284,8,13.82600921853161,7 +284,9,13.245793505287063,7 +285,0,11.03561319915916,3 +285,1,10.467363053826553,3 +285,2,10.923550087367618,3 +285,3,12.528818644417349,3 +285,4,13.774699213773953,3 +285,5,12.614265946565508,3 +285,6,12.98383348160402,3 +285,7,13.306272710005764,3 +285,8,12.806457195113628,3 +285,9,13.617808122950905,3 +286,0,7.980423470949011,5 +286,1,7.029237151868411,5 +286,2,7.57203964173113,5 +286,3,6.0997505391959885,5 +286,4,7.555003631972847,5 +286,5,9.517947089252164,5 +286,6,9.365219431010074,5 +286,7,10.415193467029534,5 +286,8,9.811346391403351,5 +286,9,9.985729488668474,5 +287,0,5.191196689340177,3 +287,1,4.521385190852174,3 +287,2,5.0137999066962315,3 +287,3,8.16033986979482,3 +287,4,6.724817803294727,3 +287,5,7.496635451926859,3 +287,6,7.77993309858983,3 +287,7,8.951604060380495,3 +287,8,8.564095009482896,3 +287,9,8.001207863208164,3 +288,0,10.760964019338541,5 +288,1,9.838449260530204,5 +288,2,10.813257072647767,5 +288,3,9.729268547819258,5 +288,4,11.045336086675343,5 +288,5,12.40564034804527,5 +288,6,12.194455050310129,5 +288,7,12.700135364242607,5 +288,8,13.047379260640872,5 +288,9,12.76133295551791,5 +289,0,9.33342468572099,7 +289,1,8.864950990891295,7 +289,2,9.566634129689007,7 +289,3,9.577558963741629,7 +289,4,9.425986405369361,7 +289,5,9.335756776540354,7 +289,6,9.732218330605962,7 +289,7,12.031574804240178,7 +289,8,12.887347509089134,7 +289,9,12.42615814362242,7 +290,0,11.406766518341144,5 +290,1,12.563027640863364,5 +290,2,11.376750781817144,5 +290,3,11.461093299089075,5 +290,4,11.67077908059405,5 +290,5,12.914288554639096,5 +290,6,13.296820725223846,5 +290,7,13.365686910087055,5 +290,8,14.042578532043004,5 +290,9,13.731684262322624,5 +291,0,11.47301896517969,7 +291,1,12.047634562516771,7 +291,2,12.035482621794609,7 +291,3,11.623046750921917,7 +291,4,11.46890970400343,7 +291,5,12.471544618269135,7 +291,6,12.390852842897473,7 +291,7,13.300778843294848,7 +291,8,14.0473302829884,7 +291,9,14.386664983450922,7 +292,0,10.66589772267788,5 +292,1,10.831533611587249,5 +292,2,10.6184358647051,5 +292,3,10.869106629796793,5 +292,4,10.708551265306442,5 +292,5,12.439557020027083,5 +292,6,12.745850668596736,5 +292,7,12.985978457437417,5 +292,8,13.059143239108069,5 +292,9,12.233696411895629,5 +293,0,11.321524503286991,7 +293,1,11.648368997296283,7 +293,2,11.579581174798856,7 +293,3,11.696854272995916,7 +293,4,12.164261856154,7 +293,5,11.48078757722739,7 +293,6,11.306245063776101,7 +293,7,14.04618127533337,7 +293,8,14.702323373502603,7 +293,9,15.264042690495026,7 +294,0,10.849486867777506,5 +294,1,9.824290868295712,5 +294,2,10.54223912854175,5 +294,3,10.886739938770901,5 +294,4,10.947218306691934,5 +294,5,13.228075024701573,5 +294,6,12.599898274999756,5 +294,7,13.190894292005506,5 +294,8,13.468584163118202,5 +294,9,13.771258224881967,5 +295,0,10.657270327896537,3 +295,1,10.987550977581483,3 +295,2,9.908220571166263,3 +295,3,13.049398427414047,3 +295,4,13.197921964717413,3 +295,5,13.05033715711921,3 +295,6,13.742935486319258,3 +295,7,13.732156695180299,3 +295,8,12.730522472414128,3 +295,9,14.067883395853812,3 +296,0,8.126491408865723,7 +296,1,8.933497953855493,7 +296,2,8.149727850395903,7 +296,3,8.905656743525707,7 +296,4,8.849071809816984,7 +296,5,9.242204416735401,7 +296,6,9.253002113085909,7 +296,7,11.163071386749507,7 +296,8,10.622517317238115,7 +296,9,10.752049492529721,7 +297,0,9.348833702790973,7 +297,1,9.73663723037229,7 +297,2,9.536653198737524,7 +297,3,10.253383137626816,7 +297,4,9.901166712731241,7 +297,5,9.658270809947329,7 +297,6,10.101729025843127,7 +297,7,12.453707090495358,7 +297,8,12.603219531749376,7 +297,9,13.479966738333648,7 +298,0,10.059097523312204,3 +298,1,10.456812862518714,3 +298,2,10.806614166723627,3 +298,3,12.681840713713507,3 +298,4,12.543243120319588,3 +298,5,12.118923605141791,3 +298,6,12.738134913787405,3 +298,7,12.556700986955013,3 +298,8,12.330248852752025,3 +298,9,12.895712772478621,3 +299,0,11.117511090451611,7 +299,1,12.032950223481311,7 +299,2,12.141602869541984,7 +299,3,11.4891173060444,7 +299,4,11.910942635553278,7 +299,5,11.21193561977857,7 +299,6,11.99388697574349,7 +299,7,14.54054790118601,7 +299,8,14.853525895641889,7 +299,9,14.18453340998918,7 diff --git a/benchmark/data/py_edid_all_es.csv b/benchmark/data/py_edid_all_es.csv new file mode 100644 index 0000000..4388425 --- /dev/null +++ b/benchmark/data/py_edid_all_es.csv @@ -0,0 +1,14 @@ +relative_period,effect,se,t_stat,p_value,conf_int_lower,conf_int_upper +-6,0.08318115467870037,0.07042598515356402,1.1811145346042895,0.23755722670799762,-0.054851239798037646,0.2212135491554384 +-5,-0.18027469634605484,0.07090068263515993,-2.542636962661004,0.01100194731145611,-0.3192374807902727,-0.041311911901836945 +-4,0.07673914102122915,0.05109156509834282,1.501992371412365,0.13309907898937723,-0.02339848648530639,0.17687676852776468 +-3,-0.0448022993837105,0.05379653179463402,-0.8328101810492426,0.4049517919028923,-0.15024156419435708,0.0606369654269361 +-2,-0.06435585571434857,0.047200178189858125,-1.3634663720014657,0.17273557567278608,-0.15686650503034344,0.028154793601646314 +-1,0.05937116064504136,0.04846422156068495,1.2250513622858705,0.22055586902665947,-0.03561696815267071,0.15435928944275343 +0,2.0103176255952824,0.047169487778851556,42.61902599028452,0.0,1.917867128379531,2.1027681228110335 +1,1.9754390917933642,0.04822661834188152,40.96159257506617,0.0,1.8809166567471176,2.0699615268396108 +2,1.934393014168767,0.05015131931408062,38.57112914725778,0.0,1.836098234536001,2.032687793801533 +3,2.026305221834489,0.058705287392644655,34.516571025055065,4.525839693638938e-261,1.911244972842832,2.1413654708261456 +4,1.9819288254231706,0.06447298498373912,30.740453942423134,1.6403894045881596e-207,1.8555640968792502,2.108293553967091 +5,1.9481381350506901,0.09811150954176855,19.856366945626483,9.708459007056476e-88,1.7558431098799658,2.140433160221414 +6,1.9869499015124135,0.08277959760215918,24.00289393845262,2.5938781702542008e-127,1.8247048715574632,2.1491949314673637 diff --git a/benchmark/data/py_edid_all_grp.csv b/benchmark/data/py_edid_all_grp.csv new file mode 100644 index 0000000..ce90675 --- /dev/null +++ b/benchmark/data/py_edid_all_grp.csv @@ -0,0 +1,4 @@ +group,effect,se,t_stat,p_value,conf_int_lower,conf_int_upper +3,1.9960931694856245,0.04980915773469947,40.07482278896376,0.0,1.8984690142253389,2.09371732474591 +5,1.9929862922818466,0.04822810483725941,41.32416770277344,0.0,1.8984609437581963,2.0875116408054972 +7,1.9320859173820977,0.054057264882739126,35.74146641664489,8.977586288078467e-280,1.8261356251091871,2.038036209655008 diff --git a/benchmark/data/py_edid_all_gt.csv b/benchmark/data/py_edid_all_gt.csv new file mode 100644 index 0000000..f2a9bca --- /dev/null +++ b/benchmark/data/py_edid_all_gt.csv @@ -0,0 +1,28 @@ +group,time,effect,se,t_stat,p_value,conf_int_lower,conf_int_upper +3,1,0.07795192279276812,0.07679968825400386,1.015003114790693,0.3101043427398802,-0.07257270020898328,0.22847654579451954 +3,2,-0.058606297547268815,0.08370687201510864,-0.70013723050947,0.483841607029866,-0.2226687519553855,0.10545615686084786 +3,3,2.0594747157993987,0.09290998613800477,22.166344021839993,7.25709828326443e-109,1.8773744891647937,2.2415749424340037 +3,4,2.009774916608825,0.08188071164481647,24.545156926906795,4.8719494521915935e-133,1.8492916707564753,2.1702581624611748 +3,5,1.9971283575639818,0.09308539736199892,21.45479757471914,4.118996561616945e-102,1.8146843312478642,2.1795723838800996 +3,6,1.9307825430697905,0.0790520720078523,24.424186413203746,9.466475835578595e-132,1.775843329031133,2.085721757108448 +3,7,2.0404036167942734,0.09911236103419209,20.58677238140224,3.606054847861289e-94,1.8461469587445258,2.2346602748440207 +3,8,1.9481381350506901,0.09811150954176855,19.856366945626483,9.708459007056476e-88,1.7558431098799658,2.140433160221414 +3,9,1.9869499015124135,0.08277959760215918,24.00289393845262,2.5938781702542008e-127,1.8247048715574632,2.1491949314673637 +5,1,0.045927623768303026,0.06865291223736102,0.6689828919349053,0.5035063912807143,-0.0886296116507137,0.18048485918731977 +5,2,0.049178760187134705,0.07117869320765587,0.6909196835584095,0.48961601564769264,-0.09032891496649656,0.18868643534076598 +5,3,-0.16982880794995348,0.07939086262900734,-2.1391480370173293,0.032423681019514906,-0.32543203940437476,-0.014225576495532188 +5,4,0.06233127585825428,0.0774618351895922,0.8046707866615215,0.421009681792448,-0.08949113128972382,0.2141536830062324 +5,5,2.071383082134991,0.07750607118079841,26.72543002861125,2.3839347692448928e-157,1.919473974037428,2.2232921902325535 +5,6,1.9833230403202988,0.07973529176743112,24.873841888046016,1.4281067873006503e-136,1.8270447401593408,2.139601340481257 +5,7,1.8723529817814084,0.07839999842128952,23.882053819952258,4.7058671603251605e-126,1.7186918084876839,2.026014155075133 +5,8,2.102723364846247,0.08116192981485515,25.907754653480204,5.446102158728484e-148,1.9436489054933634,2.261797824199131 +5,9,1.935148992326288,0.08117456670565064,23.83935105367899,1.3059443328995341e-125,1.7760497651225684,2.0942482195300074 +7,1,0.08318115467870037,0.07042598515356402,1.1811145346042895,0.23755722670799762,-0.054851239798037646,0.2212135491554384 +7,2,-0.18027469634605484,0.07090068263515993,-2.542636962661004,0.01100194731145611,-0.3192374807902727,-0.041311911901836945 +7,3,0.10755065827415529,0.07161070916041619,1.5018795308007564,0.1331282238664489,-0.032803752587632976,0.24790506913594357 +7,4,-0.1387833589545557,0.07855796161868249,-1.7666364566357402,0.0772891313919952,-0.29275413442605325,0.01518741651694186 +7,5,-0.07272912628443702,0.08455982216191313,-0.8600908141123692,0.3897389844055471,-0.23846333226089866,0.09300507969202461 +7,6,0.15079301198567657,0.08724325972751962,1.7284201949427058,0.08391291743394275,-0.0202006349741356,0.32178665894548875 +7,7,1.90992649689228,0.07620066557148839,25.06443326404366,1.2153215813933964e-138,1.7605759367741816,2.0592770570103784 +7,8,1.9400864834140605,0.08763169484625276,22.139095755455664,1.3287560976773437e-108,1.7683315176112009,2.11184144921692 +7,9,1.9462447718399534,0.07888101124778762,24.673172174812095,2.0762139962476597e-134,1.7916408307301908,2.100848712949716 diff --git a/benchmark/data/py_edid_all_overall.csv b/benchmark/data/py_edid_all_overall.csv new file mode 100644 index 0000000..1c84749 --- /dev/null +++ b/benchmark/data/py_edid_all_overall.csv @@ -0,0 +1,2 @@ +att,se,ci_lower,ci_upper +1.9808316884319868,0.036637576850270276,1.9090233573246387,2.052640019539335 diff --git a/benchmark/edid_cov_sim.R b/benchmark/edid_cov_sim.R new file mode 100644 index 0000000..2a6f2b3 --- /dev/null +++ b/benchmark/edid_cov_sim.R @@ -0,0 +1,371 @@ +#!/usr/bin/env Rscript +# benchmark/edid_cov_sim.R +# ----------------------------------------------------------------------- +# Simulation study for the edid() covariate path. +# Evaluates bias, SE calibration, and coverage for the doubly-robust +# efficient DiD estimator of Chen, Sant'Anna & Xie (2025). +# +# Designs: +# DGP 1 (main): linear covariate, 2 cohorts + never-treated +# DGP 2: nonlinear covariate (quadratic), same structure +# DGP 3: propensity hard, outcome easy (misspec propensity) +# DGP 4: outcome hard, propensity easy (misspec outcome) +# DGP 5: 2D covariate, paper-style (mirrors Sim_10DGP.R DGPs 5-8) +# +# Sample sizes: n = 200, 500, 1000, 2000 +# Repetitions: R_fast = 200 per cell (increase R for publication) +# Cross-fitting stability: K-fold seed sweep at n=500 +# +# Usage: +# Rscript benchmark/edid_cov_sim.R +# # or from R: +# source("benchmark/edid_cov_sim.R") +# ----------------------------------------------------------------------- + +suppressPackageStartupMessages({ + if (!requireNamespace("did", quietly = TRUE)) devtools::load_all(".") + library(did) +}) + +set.seed(20240419) + +# ----------------------------------------------------------------------- +# Parameters +# ----------------------------------------------------------------------- +R_FAST <- 200 # replications per cell (increase for publication) +NS <- c(200, 500, 1000, 2000) +TRUE_ATT <- 1.0 # true ATT for all post-treatment cells in all DGPs +ALPHA <- 0.05 +N_PERIODS <- 6 +COHORTS <- c(3, 5) # two treatment cohorts + +# ----------------------------------------------------------------------- +# DGP functions +# ----------------------------------------------------------------------- + +#' DGP 1: linear covariate, moderate overlap +dgp_linear <- function(n, seed) { + set.seed(seed) + n_g <- floor(n / 3) + unit <- 1:n + g_unit <- c(rep(3, n_g), rep(5, n_g), rep(Inf, n - 2 * n_g)) + x1u <- rnorm(n, mean = g_unit / 10, sd = 1) # mild selection on X + T <- N_PERIODS + ids <- rep(unit, each = T) + times <- rep(1:T, times = n) + g_vec <- g_unit[ids] + x1 <- rep(x1u, each = T) + treated <- as.numeric(times >= g_vec) + y <- times + 0.5 * x1 + TRUE_ATT * treated + rnorm(n * T, sd = 0.6) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) +} + +#' DGP 2: nonlinear covariate (quadratic), 2D +dgp_nonlinear <- function(n, seed) { + set.seed(seed) + n_g <- floor(n / 3) + g_unit <- c(rep(3, n_g), rep(5, n_g), rep(Inf, n - 2 * n_g)) + x1u <- rnorm(n, mean = g_unit / 8, sd = 1) + x2u <- rnorm(n) + T <- N_PERIODS + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + g_vec <- g_unit[ids] + x1 <- rep(x1u, each = T) + x2 <- rep(x2u, each = T) + treated <- as.numeric(times >= g_vec) + y <- times + x1^2 + 0.3 * x2 + TRUE_ATT * treated + rnorm(n * T, sd = 0.6) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1, x2 = x2) +} + +#' DGP 3: propensity hard (strong selection), outcome easy +dgp_prop_hard <- function(n, seed) { + set.seed(seed) + n_g <- floor(n / 3) + g_unit <- c(rep(3, n_g), rep(5, n_g), rep(Inf, n - 2 * n_g)) + # Strong selection: x1 drives treatment strongly + x1u <- rnorm(n) + 0.8 * (g_unit < Inf) + T <- N_PERIODS + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + g_vec <- g_unit[ids] + x1 <- rep(x1u, each = T) + treated <- as.numeric(times >= g_vec) + # Outcome: only linear in x1 (easy) + y <- times + 0.3 * x1 + TRUE_ATT * treated + rnorm(n * T, sd = 0.5) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) +} + +#' DGP 4: outcome hard (nonlinear), propensity easy +dgp_outcome_hard <- function(n, seed) { + set.seed(seed) + n_g <- floor(n / 3) + g_unit <- c(rep(3, n_g), rep(5, n_g), rep(Inf, n - 2 * n_g)) + # Mild selection + x1u <- rnorm(n, mean = 0.2 * (g_unit < Inf), sd = 1) + x2u <- rnorm(n) + T <- N_PERIODS + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + g_vec <- g_unit[ids] + x1 <- rep(x1u, each = T) + x2 <- rep(x2u, each = T) + treated <- as.numeric(times >= g_vec) + # Complex nonlinear outcome: hard to fit with linear basis + y <- times + sin(x1) * cos(x2) + 0.5 * x1^2 + TRUE_ATT * treated + + rnorm(n * T, sd = 0.6) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1, x2 = x2) +} + +#' DGP 5: 2D covariate, paper-style (mirrors Sim_10DGP.R DGPs 5-8) +dgp_paper_style <- function(n, seed) { + set.seed(seed) + n_g <- floor(n / 3) + g_unit <- c(rep(3, n_g), rep(5, n_g), rep(Inf, n - 2 * n_g)) + # X1, X2 ~ truncated N(0,1) on [-2, 2] + x1u <- pmin(pmax(rnorm(n, mean = 0.3 * (g_unit < Inf)), -2), 2) + x2u <- pmin(pmax(rnorm(n, mean = 0.3 * (g_unit < Inf)), -2), 2) + T <- N_PERIODS + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + g_vec <- g_unit[ids] + x1 <- rep(x1u, each = T) + x2 <- rep(x2u, each = T) + treated <- as.numeric(times >= g_vec) + # Additive effects + y <- times + 0.5 * x1 + 0.4 * x2 + 0.2 * x1 * x2 + + TRUE_ATT * treated + rnorm(n * T, sd = 0.5) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1, x2 = x2) +} + +# ----------------------------------------------------------------------- +# Single-run function +# ----------------------------------------------------------------------- +run_edid_cov <- function(df, xformla, seed = 1L) { + tryCatch( + edid(df, "y", "id", "t", "g", xformla = xformla, + aggregate = "none", seed = seed), + error = function(e) NULL + ) +} + +# ----------------------------------------------------------------------- +# Monte Carlo loop +# ----------------------------------------------------------------------- +run_simulation <- function(dgp_fn, xformla, label, ns = NS, R = R_FAST) { + cat(sprintf("\n=== %s | xformla: %s ===\n", label, + deparse(xformla, width.cutoff = 60))) + + results_list <- vector("list", length(ns)) + + for (ni in seq_along(ns)) { + n <- ns[ni] + cat(sprintf(" n = %4d ...", n)); flush.console() + + cell_data <- list() # keyed by "g_t" + n_ok <- 0L + + for (r in seq_len(R)) { + df <- dgp_fn(n, seed = r) + fit <- run_edid_cov(df, xformla, seed = 1L) + if (is.null(fit)) next + n_ok <- n_ok + 1L + att_df <- fit$att_gt[!fit$att_gt$is_pre, ] + for (k in seq_len(nrow(att_df))) { + key <- paste0(att_df$group[k], "_", att_df$time[k]) + cell_data[[key]] <- c(cell_data[[key]], + list(list(att = att_df$att[k], se = att_df$se[k], + ci_l = att_df$ci_lower[k], + ci_u = att_df$ci_upper[k]))) + } + } + + cat(sprintf(" %d/%d OK\n", n_ok, R)) + + # Summarise per cell + cell_summary <- do.call(rbind, lapply(names(cell_data), function(key) { + draws <- cell_data[[key]] + atts <- sapply(draws, `[[`, "att") + ses <- sapply(draws, `[[`, "se") + ci_ls <- sapply(draws, `[[`, "ci_l") + ci_us <- sapply(draws, `[[`, "ci_u") + valid <- is.finite(atts) & is.finite(ses) + if (sum(valid) < 10L) return(NULL) + mc_mean <- mean(atts[valid]) + mc_bias <- mc_mean - TRUE_ATT + mc_sd <- sd(atts[valid]) + mc_rmse <- sqrt(mc_bias^2 + mc_sd^2) + mean_se <- mean(ses[valid]) + se_ratio <- mean_se / mc_sd + coverage <- mean(ci_ls[valid] <= TRUE_ATT & TRUE_ATT <= ci_us[valid]) + mean_ci_len <- mean(ci_us[valid] - ci_ls[valid]) + parts <- strsplit(key, "_")[[1L]] + data.frame( + n = n, label = label, group = as.numeric(parts[1]), + time = as.numeric(parts[2]), + R_ok = sum(valid), + mc_mean = mc_mean, bias = mc_bias, mc_sd = mc_sd, rmse = mc_rmse, + mean_se = mean_se, se_ratio = se_ratio, + coverage = coverage, mean_ci_len = mean_ci_len, + stringsAsFactors = FALSE + ) + })) + + results_list[[ni]] <- cell_summary + } + + do.call(rbind, results_list) +} + +# ----------------------------------------------------------------------- +# Run all DGPs +# ----------------------------------------------------------------------- +all_results <- list() + +all_results[["dgp1"]] <- run_simulation( + dgp_fn = dgp_linear, xformla = ~ x1, label = "DGP1: linear, 1D") + +all_results[["dgp2"]] <- run_simulation( + dgp_fn = dgp_nonlinear, xformla = ~ x1 + x2 + I(x1^2), label = "DGP2: nonlinear, 2D") + +all_results[["dgp3"]] <- run_simulation( + dgp_fn = dgp_prop_hard, xformla = ~ x1, label = "DGP3: propensity hard") + +all_results[["dgp4"]] <- run_simulation( + dgp_fn = dgp_outcome_hard, xformla = ~ x1 + x2, label = "DGP4: outcome hard, 2D") + +all_results[["dgp5"]] <- run_simulation( + dgp_fn = dgp_paper_style, xformla = ~ x1 + x2, label = "DGP5: paper-style, 2D") + +# ----------------------------------------------------------------------- +# Print results tables +# ----------------------------------------------------------------------- +fmt_table <- function(df, caption) { + cat(sprintf("\n%s\n", paste(rep("=", nchar(caption)), collapse = ""))) + cat(sprintf("%s\n", caption)) + cat(sprintf("%s\n", paste(rep("=", nchar(caption)), collapse = ""))) + if (is.null(df) || nrow(df) == 0L) { cat(" (no results)\n"); return(invisible()) } + for (g_val in sort(unique(df$group))) { + for (t_val in sort(unique(df$time[df$group == g_val]))) { + sub <- df[df$group == g_val & df$time == t_val, ] + if (nrow(sub) == 0L) next + cat(sprintf("\n ATT(%g,%g) | true ATT = %.2f\n", g_val, t_val, TRUE_ATT)) + cat(sprintf(" %6s %7s %7s %7s %7s %7s %8s %8s\n", + "n", "bias", "mc_sd", "mean_se", "se_ratio", "rmse", "coverage", "ci_len")) + for (k in seq_len(nrow(sub))) { + cat(sprintf(" %6d %+7.4f %7.4f %7.4f %7.3f %7.4f %8.3f %8.4f\n", + sub$n[k], sub$bias[k], sub$mc_sd[k], sub$mean_se[k], + sub$se_ratio[k], sub$rmse[k], sub$coverage[k], + sub$mean_ci_len[k])) + } + } + } +} + +for (key in names(all_results)) { + fmt_table(all_results[[key]], all_results[[key]]$label[1]) +} + +# ----------------------------------------------------------------------- +# Cross-fitting stability study +# ----------------------------------------------------------------------- +cat("\n\n=== Cross-fitting stability: ATT(3,3) across fold seeds, n=500 ===\n") +cat(sprintf(" %5s %7s %7s\n", "seed", "att(3,3)", "se(3,3)")) +df_stab <- dgp_linear(n = 500, seed = 42) +for (fold_seed in c(1L, 2L, 3L, 7L, 42L, 99L, 123L)) { + fit_s <- tryCatch( + edid(df_stab, "y", "id", "t", "g", xformla = ~ x1, + seed = fold_seed, aggregate = "none"), + error = function(e) NULL + ) + if (is.null(fit_s)) { + cat(sprintf(" %5d (error)\n", fold_seed)) + next + } + row <- fit_s$att_gt[fit_s$att_gt$group == 3 & fit_s$att_gt$time == 3, ] + if (nrow(row) == 0L) next + cat(sprintf(" %5d %7.4f %7.4f\n", fold_seed, row$att, row$se)) +} + +# ----------------------------------------------------------------------- +# EIF diagnostic +# ----------------------------------------------------------------------- +cat("\n\n=== EIF diagnostics: mean and variance per cell, n=500 ===\n") +df_eif <- dgp_linear(n = 500, seed = 1) +fit_eif <- edid(df_eif, "y", "id", "t", "g", xformla = ~ x1, + store_eif = TRUE, aggregate = "none", seed = 1L) +if (!is.null(fit_eif$eif)) { + eif_mat <- fit_eif$eif + n <- fit_eif$n + post <- fit_eif$att_gt[!fit_eif$att_gt$is_pre, ] + cat(sprintf(" %5s %5s %10s %10s %10s %10s\n", + "group", "time", "eif_mean", "eif_var", "se_eif", "se_reported")) + for (k in seq_len(nrow(post))) { + g_k <- post$group[k]; t_k <- post$time[k] + cell_id <- which(fit_eif$att_gt$group == g_k & fit_eif$att_gt$time == t_k) + if (length(cell_id) == 0L) next + eif_col <- eif_mat[, cell_id, drop = TRUE] + cat(sprintf(" %5g %5g %10.4e %10.4e %10.4f %10.4f\n", + g_k, t_k, mean(eif_col), var(eif_col), + sqrt(sum(eif_col^2) / n^2), post$se[k])) + } +} + +# ----------------------------------------------------------------------- +# Acceptance criteria check +# ----------------------------------------------------------------------- +cat("\n\n=== Acceptance criteria ===\n") +passed <- 0L; failed <- 0L + +for (key in names(all_results)) { + df_res <- all_results[[key]] + if (is.null(df_res) || nrow(df_res) == 0L) next + lbl <- df_res$label[1] + + # 1. Bias decreases with n + for (g_val in unique(df_res$group)) { + for (t_val in unique(df_res$time[df_res$group == g_val])) { + sub <- df_res[df_res$group == g_val & df_res$time == t_val & + df_res$n >= min(NS) & df_res$n <= max(NS), ] + if (nrow(sub) < 2L) next + large_bias <- abs(sub$bias[sub$n == max(NS)]) + small_bias <- abs(sub$bias[sub$n == min(NS)]) + # Allow some Monte Carlo noise: just require large-n bias < 0.2 + if (large_bias < 0.2) { + cat(sprintf(" PASS: bias decreasing/small for %s ATT(%g,%g): n=%d bias=%.3f\n", + lbl, g_val, t_val, max(NS), large_bias)) + passed <- passed + 1L + } else { + cat(sprintf(" WARN: large-n bias for %s ATT(%g,%g): %.3f (> 0.2)\n", + lbl, g_val, t_val, large_bias)) + failed <- failed + 1L + } + } + } + + # 2. SE ratio at largest n in (0.5, 2.0) + sub_large <- df_res[df_res$n == max(NS), ] + for (k in seq_len(nrow(sub_large))) { + r <- sub_large$se_ratio[k] + if (is.finite(r) && r > 0.5 && r < 2.0) { + cat(sprintf(" PASS: SE ratio for %s ATT(%g,%g) n=%d: %.3f\n", + lbl, sub_large$group[k], sub_large$time[k], max(NS), r)) + passed <- passed + 1L + } else { + cat(sprintf(" WARN: SE ratio for %s ATT(%g,%g) n=%d: %.3f (outside 0.5-2.0)\n", + lbl, sub_large$group[k], sub_large$time[k], max(NS), r)) + failed <- failed + 1L + } + } +} + +cat(sprintf("\n Total PASS: %d | Total WARN: %d\n", passed, failed)) + +# ----------------------------------------------------------------------- +# Save results +# ----------------------------------------------------------------------- +results_path <- file.path("benchmark", "edid_cov_sim_results.rds") +saveRDS(list(results = all_results, params = list(R = R_FAST, ns = NS, + true_att = TRUE_ATT, date = Sys.Date())), + results_path) +cat(sprintf("\nResults saved to: %s\n", results_path)) diff --git a/benchmark/edid_sim_original.R b/benchmark/edid_sim_original.R new file mode 100644 index 0000000..4cbddce --- /dev/null +++ b/benchmark/edid_sim_original.R @@ -0,0 +1,404 @@ +# Code to compute efficient DiD estimator with variation in treatment timing, +# in the simulations + +efficient_did_unc_stagg <- function( + data, + yname, + tname, + gname, + idname, + return_weights = FALSE +){ + #----------------------------------------------------------------------------- + df <- as.data.frame(data) + Y <- df[[yname]] + Time <- df[[tname]] + G <- df[[gname]] + sample_size = length(unique(df[[idname]])) + G_cs <- G[Time==min(Time)] + + # list of dates from smallest to largest + tlist <- unique(df[,tname])[order(unique(df[,tname]))] + glist <- unique(df[,gname])[order(unique(df[,gname]))] + #----------------------------------------------------------------------------- + g_treated <- glist[glist>0] + #----------------------------------------------------------------------------- + pi2 <- rep(0, length(glist)) + for (i in 1:length(glist)) { + pi2[i] = mean(G==glist[i]) + } + #----------------------------------------------------------------------------- + # Rule: (g',t''): if g'=g, then t_min <= t'' g and t_min < t'' + filter(tt>=g) |> + dplyr::mutate(keeper = ifelse((g == g_prime ) & (t_prime < g_prime), + 1, 0)) |> + dplyr::mutate(keeper = ifelse((g != g_prime ) & + ((min(t_prime) < t_prime)*(t_prime < g_prime))==1, + 1, keeper)) |> + filter(keeper == 1) + + #----------------------------------------------------------------------------- + # Function to compute influence function + IF_gprime_tprime <- function(g, + tt, + g_prime, + t_prime, + Y, + G, + Time){ + + # First, compute the average terms we will need for the influence function + mean_g_t <- mean(Y[G==g & Time==tt]) - + mean(Y[G==g & Time==min(Time)]) + + mean_inf_t_tprime <- mean(Y[G==0 & Time==tt]) - + mean(Y[G==0 & Time==t_prime]) + + mean_gprime_tprime <- mean(Y[G==g_prime & Time==t_prime]) - + mean(Y[G==g_prime & Time==min(Time)]) + + # Now we are ready to start collecting the IFs of each piece + IF_g <- ((G_cs == g)/pi2[which(glist == g)]) * + (Y[Time == tt] - Y[Time == min(Time)] - mean_g_t) + + IF_inf <- ((G_cs == 0)/pi2[which(glist == 0)]) * + (Y[Time == tt] - Y[Time == t_prime] - mean_inf_t_tprime) + + IF_g_prime <- ((G_cs == g_prime)/pi2[which(glist == g_prime)]) * + (Y[Time == t_prime] - Y[Time == min(Time)] - mean_gprime_tprime) + + # Combine the IFs + IFs <- IF_g - (IF_inf + IF_g_prime) + att <- mean_g_t - (mean_inf_t_tprime + mean_gprime_tprime) + return(list(att = att, IFs = IFs)) + } + #----------------------------------------------------------------------------- + # Compute all IFs + #----------------------------------------------------------------------------- + # Check how many pairs we have + n_pairs <- nrow(gt_gprime_tprime) + # We will populate this with att_gts + att_gt_list <- list() + # place holder in lists + counter <- 1 + for (k in 1:n_pairs){ + # Get the influence function + IFs <- IF_gprime_tprime( + g = gt_gprime_tprime$g[k], + tt = gt_gprime_tprime$tt[k], + g_prime = gt_gprime_tprime$g_prime[k], + t_prime = gt_gprime_tprime$t_prime[k], + Y = Y, + G = G, + Time = Time + ) + + att = IFs$att + inf_function = IFs$IFs + + att_gt_list[[counter]] <- + list( + group = gt_gprime_tprime$g[k], + tt = gt_gprime_tprime$tt[k], + g_prime = gt_gprime_tprime$g_prime[k], + t_prime = gt_gprime_tprime$t_prime[k], + att = att, + inf_function = inf_function + ) + + counter <- counter + 1 + } + + + # Process the att_gt_list + process_attgt <- function(att_gt_list) { + size_list <- length(att_gt_list) + # create vectors to hold the results + group <- c() + att <- c() + tt <- c() + g_prime <- c() + t_prime <- c() + inf_function <- matrix(0, nrow = sample_size, ncol = size_list) + + # populate result vectors and matrices + for (i in 1:size_list) { + group[i] <- att_gt_list[[i]]$group + tt[i] <- att_gt_list[[i]]$tt + g_prime[i] <- att_gt_list[[i]]$g_prime + t_prime[i] <- att_gt_list[[i]]$t_prime + inf_function[,i] <- att_gt_list[[i]]$inf_function + att[i] <- att_gt_list[[i]]$att + } + + list(group=group, + tt = tt, + g_prime = g_prime, + t_prime = t_prime, + inf_function = inf_function, + att=att) + } + att_gt_results <- process_attgt(att_gt_list) + + # Now compute the efficient IF for each group-time pair + unique_gt_pairs <- data.frame(g = att_gt_results$group, + tt = att_gt_results$tt) |> + unique()|> + dplyr::mutate(estimate = 0, + std_error = 0) + + #Length of unique g-t pairs + n_gt <- nrow(unique_gt_pairs) + eff_inf_function <- matrix(0, nrow = sample_size, ncol = n_gt) + weights_out <- matrix(0, nrow = n_pairs, ncol = 7) + for (k in 1:n_gt) { + + IF_g_t <- att_gt_results$inf_function[, + which(att_gt_results$group == unique_gt_pairs$g[k] & + att_gt_results$tt == unique_gt_pairs$tt[k])] + + theta <- att_gt_results$att[which(att_gt_results$group == unique_gt_pairs$g[k] & + att_gt_results$tt == unique_gt_pairs$tt[k])] + if(return_weights){ + # Get the g_prime associated with the g and tt + g_prime_g_t <- att_gt_results$g_prime[which(att_gt_results$group == unique_gt_pairs$g[k] & + att_gt_results$tt == unique_gt_pairs$tt[k])] + + t_prime_g_t <- att_gt_results$t_prime[which(att_gt_results$group == unique_gt_pairs$g[k] & + att_gt_results$tt == unique_gt_pairs$tt[k])] + } + #----------------------------------------------------------------------------- + # Compute Omega and Theta + #----------------------------------------------------------------------------- + Omega <- cov(IF_g_t) + Omega_inv <- solve(Omega) + w <- colSums(Omega_inv) / sum(Omega_inv) + unique_gt_pairs$estimate[k] <- sum(w*theta) + Asy_var <- 1/sum(Omega_inv) + unique_gt_pairs$std_error[k] <- sqrt(Asy_var/sample_size) + eff_inf_function[,k] <- IF_g_t %*% w + + weights = NULL + if(return_weights){ + weights <- cbind( + group = unique_gt_pairs$g[k], + time = unique_gt_pairs$tt[k], + e = unique_gt_pairs$tt[k]- unique_gt_pairs$g[k], + g_prime = g_prime_g_t, + t_prime = t_prime_g_t, + theta = theta, + w_eff = as.numeric(w)) + } + # rbind ther weights into weights_out + if(k==1){ + weights_out = weights + } + else{ + weights_out = rbind(weights_out, weights) + } + } + weights_out <- as.data.frame(weights_out) + + + #----------------------------------------------------------------------------- + return(list( + estimate = unique_gt_pairs$estimate, + std.error = unique_gt_pairs$std_error, + eff_inf_function = eff_inf_function, + group = unique_gt_pairs$g, + time = unique_gt_pairs$tt, + weights = weights_out, + return_weights = return_weights + )) + +} + +# Function to compute aggregated event-study + +es_efficient <- function(eff_did, + data, + yname, + tname, + gname, + idname +){ + + df <- as.data.frame(data) + # Reduce to cross-sections + df <- data[data[,tname]==eff_did$time[1], ] + + group <- eff_did$group + time <- eff_did$time + att <- eff_did$estimate + inf_func_attgt <- eff_did$eff_inf_function + return_weights <- eff_did$return_weights + weights = NULL + if(return_weights){ + weights <- as.data.frame(eff_did$weights) + weights[,"e"] <- weights[,"time"] - weights[,"group"] + } + glist<- unique(group) + # Probability of each group + pg <- sapply(glist, function(g) mean((df[,gname]==g))) + + # length of this is equal to number of groups + pgg <- pg + + # same but length is equal to the number of ATT(g,t) + pg <- pg[match(group, glist)] + + # n x 1 vector of group variable + G <- data.frame(df[,gname]) + # Event times + eseq <- unique(time - group) # compute atts that are specific to each event time + eseq <- eseq[order(eseq)] + + # --------------------------------------------------------------------- + # Some aux functions + wif <- function(keepers, + pg, + G, + group) { + # note: weights are all of the form P(G=g|cond)/sum_cond(P(G=g|cond)) + # this is equal to P(G=g)/sum_cond(P(G=g)) which simplifies things here + + # effect of estimating weights in the numerator + if1 <- sapply(keepers, function(k) { + ( 1*BMisc::TorF(G==group[k]) - pg[k]) / + sum(pg[keepers]) + }) + # effect of estimating weights in the denominator + if2 <- base::rowSums( sapply( keepers, function(k) { + 1*BMisc::TorF(G==group[k]) - pg[k] + })) %*% + t(pg[keepers]/(sum(pg[keepers])^2)) + + # return the influence function for the weights + if1 - if2 + } + + get_agg_inf_func <- function(att, + inffunc1, + whichones, + weights_agg, + wif=NULL) { + # enforce weights are in matrix form + weights_agg <- as.matrix(weights_agg) + + # multiplies influence function times weights and sums to get vector of weighted IF (of length n) + thisinffunc <- inffunc1[,whichones]%*%weights_agg + + # Incorporate influence function of the weights + if (!is.null(wif)) { + thisinffunc <- thisinffunc + wif%*%as.matrix(att[whichones]) + } + + # return influence function + return(thisinffunc) + } + #--------------------------------------------------------------------- + # Compute ES + # compute atts that are specific to each event time + es_att <- sapply(eseq, function(e) { + # keep att(g,t) for the right g&t as well as ones that + # are not trimmed out from balancing the sample + whiche <- which( (time - group == e)) + atte <- att[whiche] + pge <- pg[whiche]/(sum(pg[whiche])) + sum(atte*pge) + }) + + # Weights for each event time + weights_event_study = NULL + if (return_weights) { + weights_event_study <- sapply(eseq, function(e) { + + whiche <- which((weights$e ==e) ) + which_g <- weights$group[whiche] + # compute the ES-weights for each group + pgg_e <- pgg[match(which_g, glist)]/ + sum(pgg[match(unique(which_g), glist)]) + # compute the weights for each group + es_weights <- pgg_e * weights$w_eff[whiche] + weights_es <- cbind( + group = which_g, + time = weights$time[whiche], + e = e, + g_prime = weights$g_prime[whiche], + t_prime = weights$t_prime[whiche], + theta = weights$theta[whiche], + w_eff = weights$w_eff[whiche], + w_eff_es = as.numeric(es_weights) + ) + }) + + # rbind all list elements in weights_event_study + weights_event_study <- as.data.frame(do.call(rbind, weights_event_study)) + # Weights for the overall ES + weights_event_study$w_eff_overall <- weights_event_study$w_eff_es/length(unique(weights_event_study$e)) + } + + # compute standard errors for ES + es_se_inner <- lapply(eseq, function(e) { + whiche <- which( (time - group == e) ) + pge <- pg[whiche]/(sum(pg[whiche])) + + wif_e <- wif(whiche, pg, G, group) + + inf_func_e <- as.numeric(get_agg_inf_func(att = att, + inffunc1 = inf_func_attgt, + whichones = whiche, + weights_agg = pge, + wif = wif_e)) + se_e <- sqrt(mean(inf_func_e^2)/length(inf_func_e)) + list(inf_func = inf_func_e, se = se_e) + }) + + es_se_e <- unlist(BMisc::getListElement(es_se_inner, "se")) + es_se_e[es_se_e <= sqrt(.Machine$double.eps)*10] <- NA + + es_inf_func_e <- simplify2array(BMisc::getListElement(es_se_inner, "inf_func")) + + # get overall average treatment effect + # by averaging over positive dynamics + epos <- eseq >= 0 + overall_es <- mean(es_att[epos]) + overall_inf_func <- get_agg_inf_func(att = overall_es[epos], + inffunc1 = as.matrix(es_inf_func_e[,epos]), + whichones = (1:sum(epos)), + weights_agg = (rep(1/sum(epos), sum(epos))), + wif=NULL) + + overall_inf_func <- as.numeric(overall_inf_func) + overall_se <- sqrt(mean(overall_inf_func^2)/length(overall_inf_func)) + if(!is.na(overall_se)){ + if (overall_se <= sqrt(.Machine$double.eps)*10) overall_se <- NA + } + + return(list(es = es_att, + se = es_se_e, + inf_func = es_inf_func_e, + overall_es = overall_es, + overall_se = overall_se, + overall_inf_func = overall_inf_func, + eseq = eseq, + weights_es = weights_event_study, + return_weights = return_weights)) + + + +} \ No newline at end of file diff --git a/benchmark/generate_diffdiff_benchmark.py b/benchmark/generate_diffdiff_benchmark.py new file mode 100644 index 0000000..db9761d --- /dev/null +++ b/benchmark/generate_diffdiff_benchmark.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +""" +generate_diffdiff_benchmark.py + +Generate the canonical diff-diff benchmark dataset and run EfficientDiD +(Chen-Sant'Anna-Xie 2025) via the diff-diff Python library. + +Outputs to benchmark/data/: + - diffdiff_panel.csv -- the raw panel data (unit, period, outcome, first_treat) + - py_edid_all_gt.csv -- ATT(g,t) group-time estimates [PT-All] + - py_edid_all_overall.csv -- overall ATT [PT-All] + - py_edid_all_es.csv -- event-study ATTs [PT-All] + - py_edid_all_grp.csv -- group-level ATTs [PT-All] + +Usage: + python3 benchmark/generate_diffdiff_benchmark.py + +Requires: diff-diff (pip install diff-diff) +""" + +import warnings +warnings.filterwarnings("ignore") + +import os +import pandas as pd +from diff_diff import generate_staggered_data, EfficientDiD + +# ── Output directory ────────────────────────────────────────────────────────── +script_dir = os.path.dirname(os.path.abspath(__file__)) +out_dir = os.path.join(script_dir, "data") +os.makedirs(out_dir, exist_ok=True) + +# ── 1. Generate canonical dataset ──────────────────────────────────────────── +print("Generating staggered panel: n_units=300, n_periods=10, ATT=2.0, seed=42 ...") +data = generate_staggered_data( + n_units=300, n_periods=10, treatment_effect=2.0, + dynamic_effects=False, seed=42 +) + +print(f" Shape : {data.shape}") +print(f" Cohorts : {sorted(data['first_treat'].unique())}") +print(f" Periods : {sorted(data['period'].unique())}") +print(f" Never-treat: first_treat == 0 ({(data['first_treat']==0).sum()//10} units)") + +# Save the 4 columns that edid() needs +panel_cols = ["unit", "period", "outcome", "first_treat"] +panel_path = os.path.join(out_dir, "diffdiff_panel.csv") +data[panel_cols].to_csv(panel_path, index=False) +print(f" Saved panel -> {panel_path}\n") + +# ── 2. EDiD PT-All: group-time ──────────────────────────────────────────────── +print("Running EfficientDiD(pt_assumption='all', aggregate='group_time') ...") +edid_gt = EfficientDiD(pt_assumption="all").fit( + data, outcome="outcome", unit="unit", time="period", + first_treat="first_treat", aggregate="all" +) +gt_df = edid_gt.to_dataframe(level="group_time") +gt_path = os.path.join(out_dir, "py_edid_all_gt.csv") +gt_df.to_csv(gt_path, index=False) +print(f" {len(gt_df)} (g,t) cells -> {gt_path}") + +# ── 3. EDiD PT-All: overall ATT ────────────────────────────────────────────── +overall_df = pd.DataFrame({ + "att" : [edid_gt.overall_att], + "se" : [edid_gt.overall_se], + "ci_lower" : [edid_gt.overall_conf_int[0]], + "ci_upper" : [edid_gt.overall_conf_int[1]], +}) +ov_path = os.path.join(out_dir, "py_edid_all_overall.csv") +overall_df.to_csv(ov_path, index=False) +print(f" Overall ATT = {edid_gt.overall_att:.4f} SE = {edid_gt.overall_se:.4f} -> {ov_path}") + +# ── 4. EDiD PT-All: event-study ────────────────────────────────────────────── +print("Running EfficientDiD(pt_assumption='all', aggregate='event_study') ...") +edid_es = EfficientDiD(pt_assumption="all").fit( + data, outcome="outcome", unit="unit", time="period", + first_treat="first_treat", aggregate="event_study" +) +es_df = edid_es.to_dataframe(level="event_study") +es_path = os.path.join(out_dir, "py_edid_all_es.csv") +es_df.to_csv(es_path, index=False) +print(f" {len(es_df)} relative-time points -> {es_path}") + +# ── 5. EDiD PT-All: group-level ────────────────────────────────────────────── +print("Running EfficientDiD(pt_assumption='all', aggregate='group') ...") +edid_grp = EfficientDiD(pt_assumption="all").fit( + data, outcome="outcome", unit="unit", time="period", + first_treat="first_treat", aggregate="group" +) +grp_df = edid_grp.to_dataframe(level="group") +grp_path = os.path.join(out_dir, "py_edid_all_grp.csv") +grp_df.to_csv(grp_path, index=False) +print(f" {len(grp_df)} groups -> {grp_path}") + +# ── Summary ─────────────────────────────────────────────────────────────────── +print("\n── Python EDiD results (summary) ──────────────────────────────────────") +print(f" Overall ATT = {edid_gt.overall_att:.4f} SE = {edid_gt.overall_se:.4f} " + f"CI = [{edid_gt.overall_conf_int[0]:.4f}, {edid_gt.overall_conf_int[1]:.4f}]") +print(f" True ATT = 2.0") +print() +print("── Group-time ATT(g,t) ──────────────────────────────────────────────────") +print(gt_df.to_string(index=False)) +print() +print("── Event-study ─────────────────────────────────────────────────────────") +print(es_df.to_string(index=False)) +print() +print("── Group-level ─────────────────────────────────────────────────────────") +print(grp_df.to_string(index=False)) +print() +print("Done.") diff --git a/man/aggregate_event_study_edid.Rd b/man/aggregate_event_study_edid.Rd new file mode 100644 index 0000000..e893224 --- /dev/null +++ b/man/aggregate_event_study_edid.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggregate.R +\name{aggregate_event_study_edid} +\alias{aggregate_event_study_edid} +\title{Aggregate cell-level ATTs by relative time (event study)} +\usage{ +aggregate_event_study_edid( + cells, + eif_matrix, + cell_index, + panel_obj, + alpha, + balance_e = NULL +) +} +\arguments{ +\item{cells}{list of \code{edid_cell_result} objects} + +\item{eif_matrix}{n x n_cells numeric matrix (or NULL)} + +\item{cell_index}{data.frame with columns \code{group}, \code{time}, +\code{cell_id}, \code{is_pre}} + +\item{panel_obj}{panel object} + +\item{alpha}{significance level} + +\item{balance_e}{integer or NULL: if not NULL, restrict to \code{[-balance_e, balance_e]}} +} +\value{ +named list, one entry per unique relative time; each entry is a list +with \code{e}, \code{att}, \code{se}, \code{ci_lower}, \code{ci_upper}, +\code{t_stat}, \code{p_value}, \code{eif_agg} +} +\description{ +For each unique relative time \eqn{e = t - g}, computes the cohort-share- +weighted average ATT over all \code{(g, t)} cells with \eqn{t - g = e}. +Includes WIF correction. +} +\keyword{internal} diff --git a/man/aggregate_group_edid.Rd b/man/aggregate_group_edid.Rd new file mode 100644 index 0000000..0596831 --- /dev/null +++ b/man/aggregate_group_edid.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggregate.R +\name{aggregate_group_edid} +\alias{aggregate_group_edid} +\title{Aggregate cell-level ATTs by treatment cohort} +\usage{ +aggregate_group_edid(cells, eif_matrix, cell_index, panel_obj, alpha) +} +\arguments{ +\item{cells}{list of \code{edid_cell_result} objects} + +\item{eif_matrix}{n x n_cells numeric matrix (or NULL)} + +\item{cell_index}{data.frame with columns \code{group}, \code{time}, +\code{cell_id}, \code{is_pre}} + +\item{panel_obj}{panel object} + +\item{alpha}{significance level} +} +\value{ +named list, one entry per cohort; each entry is a list with +\code{group}, \code{att}, \code{se}, \code{ci_lower}, \code{ci_upper}, +\code{t_stat}, \code{p_value}, \code{eif_agg} +} +\description{ +For each cohort \code{g}, computes the equal-time-weighted average ATT over +all post-treatment cells \code{(g, t)} with \code{t >= g}. +Group aggregation uses equal weights so there is no WIF correction. +} +\keyword{internal} diff --git a/man/aggregate_overall_edid.Rd b/man/aggregate_overall_edid.Rd new file mode 100644 index 0000000..1ea95fc --- /dev/null +++ b/man/aggregate_overall_edid.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggregate.R +\name{aggregate_overall_edid} +\alias{aggregate_overall_edid} +\title{Aggregate cell-level ATTs into an overall ATT} +\usage{ +aggregate_overall_edid(cells, eif_matrix, cell_index, panel_obj, alpha) +} +\arguments{ +\item{cells}{list of \code{edid_cell_result} objects (ordered by cell_id)} + +\item{eif_matrix}{n x n_cells numeric matrix (or NULL if EIF not stored)} + +\item{cell_index}{data.frame with columns \code{group}, \code{time}, +\code{cell_id}, \code{is_pre}} + +\item{panel_obj}{panel object} + +\item{alpha}{significance level} +} +\value{ +named list: \code{att}, \code{se}, \code{ci_lower}, \code{ci_upper}, +\code{t_stat}, \code{p_value}, \code{eif_agg} +} +\description{ +Uses cohort-share weights \eqn{q_k = \pi_{g_k}} over post-treatment cells, +normalized to sum to 1. Includes WIF correction for estimated weights. +} +\keyword{internal} diff --git a/man/aggte_edid.Rd b/man/aggte_edid.Rd new file mode 100644 index 0000000..d2fbc69 --- /dev/null +++ b/man/aggte_edid.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggte.R +\name{aggte_edid} +\alias{aggte_edid} +\title{Aggregate edid_fit estimates} +\usage{ +aggte_edid( + edid_fit_obj, + type = c("simple", "dynamic", "group", "calendar"), + balance_e = NULL, + min_e = -Inf, + max_e = Inf, + na.rm = FALSE +) +} +\arguments{ +\item{edid_fit_obj}{An \code{edid_fit} object returned by \code{edid()}.} + +\item{type}{Character scalar: aggregation type. One of +\code{"simple"} (overall ATT), \code{"dynamic"} (event-study), +\code{"group"} (cohort-level ATT), or \code{"calendar"} (not implemented).} + +\item{balance_e}{Integer or \code{NULL}: if not \code{NULL}, restricts the +dynamic aggregation to relative times in +\eqn{[-\text{balance\_e}, \text{balance\_e}]}.} + +\item{min_e}{Numeric: minimum relative time to include in dynamic output. +Default \code{-Inf}.} + +\item{max_e}{Numeric: maximum relative time to include in dynamic output. +Default \code{Inf}.} + +\item{na.rm}{Logical: whether to remove NA ATT entries before aggregating. +Default \code{FALSE}.} +} +\value{ +An S3 object of class \code{c("AGGTEobj_edid", "list")} with fields +matching \code{AGGTEobj} where possible: +\describe{ +\item{\code{att.egt}}{Vector of ATT estimates for each index.} +\item{\code{se.egt}}{Vector of standard errors.} +\item{\code{egt}}{Vector of indices (relative time, group, etc.).} +\item{\code{type}}{The aggregation type string.} +\item{\code{overall.att}}{Scalar overall ATT.} +\item{\code{overall.se}}{Scalar overall SE.} +\item{\code{alp}}{Significance level used.} +\item{\code{call}}{The matched call.} +} +} +\description{ +Provides the same user-facing interface as \code{\link[did]{aggte}} but +accepts an \code{edid_fit} object produced by \code{\link{edid}}. +} +\seealso{ +\code{\link{edid}}, \code{\link[did]{aggte}} +} diff --git a/man/as.data.frame.edid_fit.Rd b/man/as.data.frame.edid_fit.Rd new file mode 100644 index 0000000..a527343 --- /dev/null +++ b/man/as.data.frame.edid_fit.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-methods.R +\name{as.data.frame.edid_fit} +\alias{as.data.frame.edid_fit} +\title{Coerce edid_fit to a data.frame} +\usage{ +\method{as.data.frame}{edid_fit}( + x, + row.names = NULL, + optional = FALSE, + ..., + which = c("att_gt", "overall", "event_study", "group") +) +} +\arguments{ +\item{x}{an \code{edid_fit} object} + +\item{row.names}{ignored; included for S3 generic consistency} + +\item{optional}{ignored; included for S3 generic consistency} + +\item{...}{not used; reserved for \code{which} (see Details)} + +\item{which}{character: one of \code{"att_gt"}, \code{"overall"}, +\code{"event_study"}, \code{"group"}} +} +\value{ +data.frame +} +\description{ +Coerce edid_fit to a data.frame +} diff --git a/man/build_basis_matrix_edid.Rd b/man/build_basis_matrix_edid.Rd new file mode 100644 index 0000000..1eac156 --- /dev/null +++ b/man/build_basis_matrix_edid.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{build_basis_matrix_edid} +\alias{build_basis_matrix_edid} +\title{Build B-spline basis matrix for a covariate matrix} +\usage{ +build_basis_matrix_edid(X_mat, bs_df = 4L) +} +\arguments{ +\item{X_mat}{numeric matrix, n x d. May also be a numeric vector (treated +as n x 1).} + +\item{bs_df}{positive integer: degrees of freedom for the B-spline basis +(default 4)} +} +\value{ +numeric matrix n x p, with attribute \code{"bs_objects"}: a list of +length d, each element the fitted \code{bs} object for that column (used +by \code{predict_basis_edid()} to evaluate on new data). +} +\description{ +For the first covariate column, fits a B-spline basis with intercept +(\code{bs_df} columns). For each additional column, fits without intercept +(\code{bs_df - 1} columns, to avoid collinearity). Falls back to a linear +basis (intercept + raw column) if \code{splines::bs()} fails. +} +\keyword{internal} diff --git a/man/build_cluster_index.Rd b/man/build_cluster_index.Rd new file mode 100644 index 0000000..df46369 --- /dev/null +++ b/man/build_cluster_index.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-data.R +\name{build_cluster_index} +\alias{build_cluster_index} +\title{Build cluster integer index from cluster id column} +\usage{ +build_cluster_index(dt, idname, clustervars, all_units) +} +\arguments{ +\item{dt}{data.table (long format), sorted by unit then time} + +\item{idname}{character scalar: unit id column name} + +\item{clustervars}{character scalar: cluster id column name} + +\item{all_units}{sorted vector of unique unit ids} +} +\value{ +integer vector length n (values 1..G) +} +\description{ +Build cluster integer index from cluster id column +} +\keyword{internal} diff --git a/man/build_crossfit_folds_edid.Rd b/man/build_crossfit_folds_edid.Rd new file mode 100644 index 0000000..7af5dc8 --- /dev/null +++ b/man/build_crossfit_folds_edid.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{build_crossfit_folds_edid} +\alias{build_crossfit_folds_edid} +\title{Generate cross-fitting fold assignments} +\usage{ +build_crossfit_folds_edid(n, K = 5L, seed = NULL) +} +\arguments{ +\item{n}{positive integer: number of units} + +\item{K}{positive integer: number of folds (default 5)} + +\item{seed}{integer or NULL: if not NULL, set.seed() is called and restored} +} +\value{ +integer vector length \code{n}, values in \code{1:K} +} +\description{ +Assigns each of \code{n} units to one of \code{K} folds via simple +round-robin ordering (after optional random shuffling). +} +\keyword{internal} diff --git a/man/check_condition_edid.Rd b/man/check_condition_edid.Rd new file mode 100644 index 0000000..a1a7ed1 --- /dev/null +++ b/man/check_condition_edid.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-linalg.R +\name{check_condition_edid} +\alias{check_condition_edid} +\title{Condition number of a matrix via SVD} +\usage{ +check_condition_edid(mat) +} +\arguments{ +\item{mat}{numeric matrix} +} +\value{ +scalar: max singular value / min positive singular value. +Returns \code{Inf} if min singular value is 0. +} +\description{ +Condition number of a matrix via SVD +} +\keyword{internal} diff --git a/man/cluster_aggregate_edid.Rd b/man/cluster_aggregate_edid.Rd new file mode 100644 index 0000000..90c7f2e --- /dev/null +++ b/man/cluster_aggregate_edid.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-inference.R +\name{cluster_aggregate_edid} +\alias{cluster_aggregate_edid} +\title{Aggregate EIF to cluster level (centered)} +\usage{ +cluster_aggregate_edid(eif, cluster_indices) +} +\arguments{ +\item{eif}{numeric vector length n} + +\item{cluster_indices}{integer vector length n (values 1..G)} +} +\value{ +numeric vector length G (cluster sums, centered) +} +\description{ +Returns the vector of cluster sums of \code{eif}, mean-subtracted. +The small-sample correction \eqn{G/(G-1)} is applied in the SE formula +(in \code{safe_inference_edid}), not here. +} +\keyword{internal} diff --git a/man/coef.edid_fit.Rd b/man/coef.edid_fit.Rd new file mode 100644 index 0000000..e234804 --- /dev/null +++ b/man/coef.edid_fit.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-methods.R +\name{coef.edid_fit} +\alias{coef.edid_fit} +\title{Extract ATT coefficients from an edid_fit object} +\usage{ +\method{coef}{edid_fit}(object, which = c("att_gt", "overall", "event_study", "group"), ...) +} +\arguments{ +\item{object}{an \code{edid_fit} object} + +\item{which}{character: one of \code{"att_gt"}, \code{"overall"}, +\code{"event_study"}, \code{"group"}} + +\item{...}{additional arguments (ignored)} +} +\value{ +named numeric vector of ATT estimates +} +\description{ +Extract ATT coefficients from an edid_fit object +} diff --git a/man/compute_bootstrap_stats_edid.Rd b/man/compute_bootstrap_stats_edid.Rd new file mode 100644 index 0000000..afea400 --- /dev/null +++ b/man/compute_bootstrap_stats_edid.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-bootstrap.R +\name{compute_bootstrap_stats_edid} +\alias{compute_bootstrap_stats_edid} +\title{Compute bootstrap SE, CI, and p-value from a vector of bootstrap draws} +\usage{ +compute_bootstrap_stats_edid(boot_draws, att_hat, alpha = 0.05) +} +\arguments{ +\item{boot_draws}{numeric vector of length \code{n_bootstrap}} + +\item{att_hat}{scalar point estimate} + +\item{alpha}{significance level in (0, 1)} +} +\value{ +named list: \code{se_boot}, \code{ci_lower}, \code{ci_upper}, +\code{p_value_boot} +} +\description{ +Compute bootstrap SE, CI, and p-value from a vector of bootstrap draws +} +\keyword{internal} diff --git a/man/compute_efficient_weights_edid.Rd b/man/compute_efficient_weights_edid.Rd new file mode 100644 index 0000000..856f775 --- /dev/null +++ b/man/compute_efficient_weights_edid.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-nocov.R +\name{compute_efficient_weights_edid} +\alias{compute_efficient_weights_edid} +\title{Compute efficient inverse-covariance weights} +\usage{ +compute_efficient_weights_edid(omega_star) +} +\arguments{ +\item{omega_star}{numeric matrix H x H} +} +\value{ +numeric vector length H, summing to 1 +} +\description{ +Implements \eqn{w = (\Omega^{*-1} \mathbf{1}) / (\mathbf{1}' \Omega^{*-1} \mathbf{1})} +with fallback to uniform weights when the matrix is degenerate. +} +\keyword{internal} diff --git a/man/compute_eif_cov_edid.Rd b/man/compute_eif_cov_edid.Rd new file mode 100644 index 0000000..ee07226 --- /dev/null +++ b/man/compute_eif_cov_edid.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov-eif.R +\name{compute_eif_cov_edid} +\alias{compute_eif_cov_edid} +\title{Compute the efficient influence function for a cell with covariates} +\usage{ +compute_eif_cov_edid(panel_obj, gen_out_mat, weights, att_gt, g) +} +\arguments{ +\item{panel_obj}{panel object (needs n)} + +\item{gen_out_mat}{numeric matrix n x H (generated outcomes)} + +\item{weights}{numeric vector length H summing to 1} + +\item{att_gt}{scalar point estimate (= sum_j w_j * colMeans(gen_out_mat))} + +\item{g}{scalar: target treatment cohort (unused; kept for API compatibility)} +} +\value{ +numeric vector length n, mean approximately 0 +} +\description{ +The efficient GMM estimator is +\deqn{\hat\beta_{g,t} = \sum_j w_j \cdot \frac{1}{n}\sum_i \tilde{Y}_{j,i}} +where \eqn{\tilde{Y}_{j,i}} is the doubly-robust generated outcome for pair j +and \eqn{w_j} are the fixed efficient weights. By the delta method, its +influence function is +\deqn{EIF_i = \sum_j w_j \cdot (\tilde{Y}_{j,i} - \beta_j) + = \left(\sum_j w_j \tilde{Y}_{j,i}\right) - ATT(g,t)} +(using \eqn{\sum_j w_j \beta_j = ATT(g,t)}). +} +\details{ +Statistical note: an alternative form \eqn{EIF_i = \sum_j w_j \tilde{Y}_{j,i} ++ (G_{g,i}/\pi_g) \cdot ATT(g,t)} that appears in some semiparametric +efficiency calculations adds a term whose mean is \eqn{ATT(g,t)} (since +\eqn{E[G_{g,i}/\pi_g] = 1}). After centring, this equals +\eqn{(correct\,EIF) + (G_{g,i}/\pi_g - 1) \cdot ATT(g,t)}, inflating the +variance by \eqn{ATT^2 \cdot Var(G_{g,i}/\pi_g - 1) > 0} whenever +\eqn{ATT \ne 0}. The correct expression for the SE formula +\eqn{SE = \sqrt{\sum_i EIF_i^2 / n^2}} is the one below. +} +\keyword{internal} diff --git a/man/compute_eif_nocov_edid.Rd b/man/compute_eif_nocov_edid.Rd new file mode 100644 index 0000000..95ae750 --- /dev/null +++ b/man/compute_eif_nocov_edid.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-nocov.R +\name{compute_eif_nocov_edid} +\alias{compute_eif_nocov_edid} +\title{Compute the no-covariate efficient influence function for a (g, t) cell} +\usage{ +compute_eif_nocov_edid( + target_g, + target_t, + pairs, + weights, + panel_obj, + att_gt, + pt_assumption +) +} +\arguments{ +\item{target_g}{scalar cohort value} + +\item{target_t}{scalar time period} + +\item{pairs}{data.frame with columns \code{gp} and \code{tpre}; H rows} + +\item{weights}{numeric vector length H (efficient weights)} + +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{att_gt}{scalar ATT estimate for this cell} + +\item{pt_assumption}{\code{"all"} or \code{"post"}} +} +\value{ +numeric vector length n (zero-mean by construction) +} +\description{ +Compute the no-covariate efficient influence function for a (g, t) cell +} +\keyword{internal} diff --git a/man/compute_eif_se_edid.Rd b/man/compute_eif_se_edid.Rd new file mode 100644 index 0000000..bb98559 --- /dev/null +++ b/man/compute_eif_se_edid.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-inference.R +\name{compute_eif_se_edid} +\alias{compute_eif_se_edid} +\title{Compute SE from EIF vector} +\usage{ +compute_eif_se_edid(eif_vec, n) +} +\arguments{ +\item{eif_vec}{numeric vector (may be cluster-aggregated sums)} + +\item{n}{integer denominator (number of units or clusters)} +} +\value{ +scalar SE +} +\description{ +\deqn{SE = \sqrt{\sum_i \text{eif}_i^2 / n^2}} +} +\keyword{internal} diff --git a/man/compute_generated_outcomes_cov_edid.Rd b/man/compute_generated_outcomes_cov_edid.Rd new file mode 100644 index 0000000..4f9543a --- /dev/null +++ b/man/compute_generated_outcomes_cov_edid.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov-eif.R +\name{compute_generated_outcomes_cov_edid} +\alias{compute_generated_outcomes_cov_edid} +\title{Compute doubly-robust generated outcomes for a (g, t) cell} +\usage{ +compute_generated_outcomes_cov_edid( + panel_obj, + g, + t, + pairs, + prop_ratios, + cond_means, + pt_assumption +) +} +\arguments{ +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{g}{scalar: treatment cohort} + +\item{t}{scalar: target time period} + +\item{pairs}{data.frame with columns \code{gp} and \code{tpre}; H rows} + +\item{prop_ratios}{named list of n-vectors keyed by \code{as.character(gp)}: +cross-fitted propensity ratios. Must include key \code{"Inf"} for +\eqn{r_{g,\infty}} and keys for each cross-cohort gp.} + +\item{cond_means}{named list of n-vectors keyed by +\code{paste0(gp, "_", period)}: cross-fitted conditional means +\eqn{E[Y_{period} - Y_1 | G=gp, X]}. Must include never-treated keys.} + +\item{pt_assumption}{\code{"all"} or \code{"post"}} +} +\value{ +numeric matrix n x H; entries may be NA if nuisances are NA +} +\description{ +Returns the n x H matrix of generated outcomes where column j corresponds +to pair j = \eqn{(g'_j, t_{pre,j})} and row i to unit i. Implements +Eq. (4.4) of Chen, Sant'Anna & Xie (2025). +} +\details{ +For self-comparison pairs (gp == g), the formula reduces to Eq. (3.2): +\deqn{\tilde{Y} = (G_g/\pi_g - r_{g,\infty} G_\infty/\pi_g)(Y_t - Y_{tpre} - m_{\infty,t,tpre})} + +For cross-cohort pairs (gp != g), the three-term doubly-robust formula applies: +\deqn{\tilde{Y} = (G_g/\pi_g)(Y_t - Y_1 - m_{\infty,t,1}) + - r_{g,\infty} (G_\infty/\pi_g)(Y_t - Y_{tpre} - m_{\infty,t,tpre}) + - r_{g,g'} (G_{g'}/\pi_g)(Y_{tpre} - Y_1 - m_{g',tpre,1})} +Note: term1 uses only \eqn{m_{\infty,t,1}}, not \eqn{m_{g',tpre,1}}. Adding +\eqn{m_{g',tpre,1}} to term1 would bias the estimator by +\eqn{E[Y_{tpre}-Y_1|G=g]}, since the propensity-ratio correction in term3 +accounts for G=g' units only, not G=g units. +} +\keyword{internal} diff --git a/man/compute_generated_outcomes_nocov_edid.Rd b/man/compute_generated_outcomes_nocov_edid.Rd new file mode 100644 index 0000000..dc23439 --- /dev/null +++ b/man/compute_generated_outcomes_nocov_edid.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-nocov.R +\name{compute_generated_outcomes_nocov_edid} +\alias{compute_generated_outcomes_nocov_edid} +\title{Compute generated-outcome scalars for each valid pair} +\usage{ +compute_generated_outcomes_nocov_edid( + target_g, + target_t, + pairs, + panel_obj, + pt_assumption +) +} +\arguments{ +\item{target_g}{scalar cohort value} + +\item{target_t}{scalar time period} + +\item{pairs}{data.frame with columns \code{gp} and \code{tpre}; H rows} + +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{pt_assumption}{\code{"all"} or \code{"post"}} +} +\value{ +numeric vector length H +} +\description{ +Compute generated-outcome scalars for each valid pair +} +\keyword{internal} diff --git a/man/compute_omega_star_cov_edid.Rd b/man/compute_omega_star_cov_edid.Rd new file mode 100644 index 0000000..63231b8 --- /dev/null +++ b/man/compute_omega_star_cov_edid.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov-eif.R +\name{compute_omega_star_cov_edid} +\alias{compute_omega_star_cov_edid} +\title{Compute the averaged conditional covariance matrix Omega*(X)} +\usage{ +compute_omega_star_cov_edid( + panel_obj, + g, + t, + pairs, + prop_ratios, + cond_means, + bw = NULL +) +} +\arguments{ +\item{panel_obj}{panel object (needs \code{covariate_matrix}, \code{outcome_wide}, +\code{cohort_masks}, \code{never_treated_mask})} + +\item{g}{scalar: target treatment cohort} + +\item{t}{scalar: target time period} + +\item{pairs}{data.frame with columns \code{gp} and \code{tpre}; H rows} + +\item{prop_ratios}{named list of n-vectors: cross-fitted propensity ratios} + +\item{cond_means}{named list of n-vectors: cross-fitted conditional means} + +\item{bw}{numeric vector length d or NULL (auto from \code{bw.nrd0})} +} +\value{ +numeric matrix H x H (positive semi-definite) +} +\description{ +Estimates \eqn{\Omega^* = n^{-1} \sum_i \hat\Omega^*(X_i)} using a faithful +plug-in of Eq. (3.12) from Chen, Sant'Anna & Xie (2025). +} +\details{ +Each (j,k)-th element of Omega*(X) is estimated using Nadaraya-Watson +kernel smoothing of outcome change covariances within specific cohorts, +scaled by propensity scores. + +\strong{Computational complexity}: O(n^2 * H^2). Emits a warning for +n > 1000. +} +\keyword{internal} diff --git a/man/compute_omega_star_nocov_edid.Rd b/man/compute_omega_star_nocov_edid.Rd new file mode 100644 index 0000000..d87dfbc --- /dev/null +++ b/man/compute_omega_star_nocov_edid.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-nocov.R +\name{compute_omega_star_nocov_edid} +\alias{compute_omega_star_nocov_edid} +\title{Compute the Omega* covariance matrix for the no-covariate EDiD path} +\usage{ +compute_omega_star_nocov_edid( + target_g, + target_t, + pairs, + panel_obj, + pt_assumption +) +} +\arguments{ +\item{target_g}{scalar cohort value} + +\item{target_t}{scalar time period} + +\item{pairs}{data.frame with columns \code{gp} and \code{tpre}; H rows} + +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{pt_assumption}{\code{"all"} or \code{"post"}} +} +\value{ +numeric matrix H x H +} +\description{ +Builds the \eqn{H \times H} sample covariance matrix of the identifying +moments for cell \code{(target_g, target_t)}. +} +\keyword{internal} diff --git a/man/compute_pseudoinverse_edid.Rd b/man/compute_pseudoinverse_edid.Rd new file mode 100644 index 0000000..0f7d899 --- /dev/null +++ b/man/compute_pseudoinverse_edid.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-linalg.R +\name{compute_pseudoinverse_edid} +\alias{compute_pseudoinverse_edid} +\title{SVD-based Moore-Penrose pseudoinverse} +\usage{ +compute_pseudoinverse_edid(mat, tol = NULL) +} +\arguments{ +\item{mat}{numeric matrix} + +\item{tol}{tolerance for zero singular values; defaults to +\code{max(dim(mat)) * max(svd$d) * .Machine$double.eps}} +} +\value{ +matrix of same dimensions as \code{t(mat)} +} +\description{ +SVD-based Moore-Penrose pseudoinverse +} +\keyword{internal} diff --git a/man/compute_wif_contribution_edid.Rd b/man/compute_wif_contribution_edid.Rd new file mode 100644 index 0000000..d9203bb --- /dev/null +++ b/man/compute_wif_contribution_edid.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggregate.R +\name{compute_wif_contribution_edid} +\alias{compute_wif_contribution_edid} +\title{Compute WIF (weight influence function) correction for aggregated EIF} +\usage{ +compute_wif_contribution_edid( + weight_fn, + cells, + eif_matrix, + cell_index, + panel_obj, + agg_att +) +} +\arguments{ +\item{weight_fn}{function(cells, cell_index, panel_obj) that returns a named +numeric vector of normalized weights \eqn{q_k} for all post-treatment cells; +names are cell_ids (character)} + +\item{cells}{list of \code{edid_cell_result} objects} + +\item{eif_matrix}{n x n_cells numeric matrix (or NULL)} + +\item{cell_index}{data.frame with columns \code{group}, \code{time}, +\code{cell_id}, \code{is_pre}} + +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{agg_att}{scalar: the already-computed aggregated ATT (used for WIF)} +} +\value{ +numeric vector length n: WIF correction to add to aggregated EIF +} +\description{ +Corrects the aggregated EIF for estimation error in the cohort-share weights +\eqn{\pi_g = n_g / n}. The correction accounts for the fact that \eqn{\pi_g} +is estimated from data and therefore contributes to the variance of the +aggregated estimator. +} +\keyword{internal} diff --git a/man/cov_nn_edid.Rd b/man/cov_nn_edid.Rd new file mode 100644 index 0000000..0a558a2 --- /dev/null +++ b/man/cov_nn_edid.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-utils.R +\name{cov_nn_edid} +\alias{cov_nn_edid} +\title{Biased sample covariance (divide by n, not n-1)} +\usage{ +cov_nn_edid(x, y) +} +\arguments{ +\item{x}{numeric vector} + +\item{y}{numeric vector, same length as x} +} +\value{ +scalar +} +\description{ +Biased sample covariance (divide by n, not n-1) +} +\keyword{internal} diff --git a/man/dot-check_extreme_ratios_edid.Rd b/man/dot-check_extreme_ratios_edid.Rd new file mode 100644 index 0000000..0cc83a4 --- /dev/null +++ b/man/dot-check_extreme_ratios_edid.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{.check_extreme_ratios_edid} +\alias{.check_extreme_ratios_edid} +\title{Check for extreme propensity ratios and warn once} +\usage{ +.check_extreme_ratios_edid(r_vec, g, gp) +} +\description{ +Check for extreme propensity ratios and warn once +} +\keyword{internal} diff --git a/man/edid.Rd b/man/edid.Rd new file mode 100644 index 0000000..c327a2d --- /dev/null +++ b/man/edid.Rd @@ -0,0 +1,173 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid.R +\name{edid} +\alias{edid} +\title{Efficient Difference-in-Differences Estimator} +\usage{ +edid( + data, + yname, + idname, + tname, + gname, + xformla = NULL, + covariates = NULL, + pt_assumption = c("all", "post"), + alp = 0.05, + clustervars = NULL, + control_group = c("nevertreated", "notyettreated"), + bstrap = FALSE, + biters = 1000L, + bootstrap_weights = c("rademacher", "mammen", "webb"), + seed = NULL, + anticipation = 0L, + aggregate = c("all", "overall", "event_study", "group", "none"), + balance_e = NULL, + survey_design = NULL, + store_eif = FALSE +) +} +\arguments{ +\item{data}{A \code{data.frame}, \code{data.table}, or tibble in long format +(one row per unit-time observation).} + +\item{yname}{Character scalar: name of the outcome column (must be numeric +with no missing or non-finite values).} + +\item{idname}{Character scalar: name of the unit identifier column.} + +\item{tname}{Character scalar: name of the time period column (numeric).} + +\item{gname}{Character scalar: name of the column recording each unit's +first treatment period. Never-treated units should have \code{Inf} or +\code{0} (the \code{att_gt()} convention). \code{0} is automatically +converted to \code{Inf} internally.} + +\item{xformla}{A one-sided formula specifying covariates to condition on, +e.g., \code{~ X1 + X2}. Default \code{NULL} (equivalent to \code{~1}, +no covariates). When \code{NULL} or \code{~1}, the efficient no-covariate +path is used. \strong{Note}: The \code{covariates} argument is deprecated +and will error if non-NULL; use \code{xformla} instead.} + +\item{covariates}{Character vector of covariate column names, or \code{NULL} +(default). \strong{Currently not implemented}: passing non-NULL triggers an +error.} + +\item{pt_assumption}{Parallel-trends assumption regime. One of: +\describe{ +\item{\code{"all"}}{PT-All: parallel trends holds for all pre-treatment +periods (default). Uses all valid \eqn{(g', t_{pre})} pairs.} +\item{\code{"post"}}{PT-Post: parallel trends holds only for the period +immediately before treatment. Each cell uses a single DiD moment.} +}} + +\item{alp}{Significance level for confidence intervals. Default \code{0.05}.} + +\item{clustervars}{Character scalar naming a time-invariant cluster variable +in \code{data}, or \code{NULL} for no clustering (default). When supplied, +cluster-robust standard errors are computed via the sandwich EIF formula. +Note: edid() currently supports only a single cluster variable internally.} + +\item{control_group}{Control group definition. One of: +\describe{ +\item{\code{"nevertreated"}}{Use never-treated units (default).} +\item{\code{"notyettreated"}}{Use the last-treated cohort as +pseudo-controls (relabeled as never-treated internally).} +}} + +\item{bstrap}{Logical: whether to use multiplier bootstrap inference. +Default \code{FALSE} (analytical standard errors). When \code{TRUE}, +\code{biters} bootstrap draws are used.} + +\item{biters}{Positive integer: number of multiplier bootstrap iterations. +Default \code{1000L}. Only used when \code{bstrap = TRUE}.} + +\item{bootstrap_weights}{Distribution for multiplier weights. One of +\code{"rademacher"} (default), \code{"mammen"}, or \code{"webb"}.} + +\item{seed}{Integer seed for reproducibility of bootstrap draws, or +\code{NULL} (default, no seed set).} + +\item{anticipation}{Non-negative integer: number of anticipation periods. +Default \code{0L}. The effective treatment start for cohort \eqn{g} is +\eqn{g - \text{anticipation}}.} + +\item{aggregate}{Which aggregations to compute. One or more of +\code{"all"} (default), \code{"overall"}, \code{"event_study"}, +\code{"group"}, or \code{"none"}.} + +\item{balance_e}{Integer or \code{NULL}: if not \code{NULL}, restricts the +event-study aggregation to relative times in +\eqn{[-\text{balance\_e}, \text{balance\_e}]}.} + +\item{survey_design}{Always \code{NULL}. Survey designs are not yet +implemented; passing a non-NULL value triggers an error.} + +\item{store_eif}{Logical: if \code{TRUE}, store the full \eqn{n \times K} +EIF matrix in \code{edid_fit$eif}. Default \code{FALSE}. The EIF is +always computed internally when \code{bstrap = TRUE}.} +} +\value{ +An object of class \code{edid_fit} (a list) with elements: +\describe{ +\item{\code{call}}{The matched call.} +\item{\code{att_gt}}{data.frame of cell-level estimates (group, time, +att, se, ci_lower, ci_upper, t_stat, p_value, is_pre).} +\item{\code{overall}}{List: overall ATT with SE and CI.} +\item{\code{event_study}}{List of per-relative-time ATTs.} +\item{\code{group}}{List of per-cohort ATTs.} +\item{\code{eif}}{EIF matrix or \code{NULL}.} +\item{\code{bootstrap}}{Bootstrap results or \code{NULL}.} +\item{\code{bstrap}}{Logical: whether bootstrap inference was used.} +} +} +\description{ +Estimates group-time average treatment effects \eqn{ATT(g, t)} for staggered +adoption designs using the Efficient DiD (EDiD) estimator of Chen, Sant'Anna +& Xie (2025). The estimator combines all valid DiD identifying moments for +each \eqn{(g, t)} cell with optimal inverse-covariance weights to achieve +minimum asymptotic variance. +} +\examples{ +# Simulate a simple balanced panel with staggered adoption +set.seed(42) +n_units <- 100 +n_periods <- 6 +unit_ids <- rep(1:n_units, each = n_periods) +time_ids <- rep(1:n_periods, times = n_units) +# Assign cohorts: 1/3 treated in period 3, 1/3 in period 5, 1/3 never +cohort_assign <- rep( + c(3, 5, Inf), + times = c(ceiling(n_units / 3), + ceiling(n_units / 3), + n_units - 2 * ceiling(n_units / 3)) +)[1:n_units] +first_treat_vec <- cohort_assign[unit_ids] +# Generate outcomes: ATT = 1 for treated post-treatment +treat_effect <- as.numeric(time_ids >= first_treat_vec) +y_vals <- 0.5 * time_ids + treat_effect + rnorm(n_units * n_periods, sd = 0.5) +panel_df <- data.frame( + id = unit_ids, + period = time_ids, + y = y_vals, + first_treat = first_treat_vec +) +# Fit EDiD (no-covariate, PT-All, analytical SE) +fit <- edid( + data = panel_df, + yname = "y", + idname = "id", + tname = "period", + gname = "first_treat", + pt_assumption = "all" +) +# View overall ATT +fit$overall$att +# Extract cell-level estimates +head(fit$att_gt) + +} +\references{ +Chen, L., Sant'Anna, P. H. C., & Xie, Y. (2025). +\emph{Efficient Difference-in-Differences}. Working paper. +} diff --git a/man/enumerate_valid_pairs_edid.Rd b/man/enumerate_valid_pairs_edid.Rd new file mode 100644 index 0000000..3ff7fd9 --- /dev/null +++ b/man/enumerate_valid_pairs_edid.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-pairs.R +\name{enumerate_valid_pairs_edid} +\alias{enumerate_valid_pairs_edid} +\title{Enumerate valid comparison pairs for a target (g, t) cell} +\usage{ +enumerate_valid_pairs_edid( + target_g, + treatment_groups, + time_periods, + period_1, + pt_assumption, + anticipation = 0L, + never_treated_val = Inf +) +} +\arguments{ +\item{target_g}{scalar: treatment cohort being estimated} + +\item{treatment_groups}{sorted numeric vector of all finite cohort values} + +\item{time_periods}{sorted numeric vector of all time periods in the panel} + +\item{period_1}{scalar: universal first period} + +\item{pt_assumption}{character: \code{"all"} or \code{"post"}} + +\item{anticipation}{integer >= 0} + +\item{never_treated_val}{value used to represent the never-treated cohort +(default \code{Inf})} +} +\value{ +data.frame with columns \code{gp} (comparison cohort) and +\code{tpre} (pre-period). May have 0 rows. +} +\description{ +Constructs the set \eqn{H_{gt}} of valid \code{(gp, tpre)} pairs used to +form identifying DiD moments for cohort \code{target_g} at time \code{target_t}. +} +\details{ +Under \strong{PT-Post}: returns exactly one pair \code{(Inf, target_g - 1 - anticipation)}, +or a 0-row data.frame if that pre-period does not exist in \code{time_periods} or +equals \code{period_1}. + +Under \strong{PT-All}: iterates over treated cohorts \code{gp} only (the +never-treated group is the time control inside every moment, not a comparison +cohort). For \code{gp == target_g}: valid \code{tpre} are all periods strictly +less than \code{gp - anticipation}, including \code{period_1} (this is the +degenerate CS DiD moment whose comparison-cohort EIF term is identically zero). +For \code{gp != target_g}: valid \code{tpre} are periods strictly between +\code{period_1} and \code{gp - anticipation} (exclusive on both ends). +Returns a 0-row data.frame if no valid pairs exist (e.g., single cohort with +only one pre-period equal to \code{period_1}). +} +\keyword{internal} diff --git a/man/estimate_all_conditional_means.Rd b/man/estimate_all_conditional_means.Rd new file mode 100644 index 0000000..b4fadcc --- /dev/null +++ b/man/estimate_all_conditional_means.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{estimate_all_conditional_means} +\alias{estimate_all_conditional_means} +\title{Estimate conditional means for all (g', period) combinations via cross-fitting} +\usage{ +estimate_all_conditional_means( + panel_obj, + pairs, + t_val, + bs_df, + K_folds, + fold_id +) +} +\arguments{ +\item{panel_obj}{panel object with \code{covariate_matrix}, \code{unit_cohorts}, +\code{outcome_wide}, and \code{period_to_col}} + +\item{pairs}{data.frame with columns \code{gp} and \code{tpre}} + +\item{t_val}{scalar: target time period for this cell} + +\item{bs_df}{integer: B-spline df} + +\item{K_folds}{integer: number of cross-fitting folds} + +\item{fold_id}{integer vector length n: pre-generated fold assignments} +} +\value{ +named list of n-vectors, keyed by \code{paste0(gp, "_", period)} +} +\description{ +For each unique (gp, period) pair needed by the cell, performs K-fold +cross-fitting to produce a full-sample n-vector of +\eqn{\hat m_{g', \text{period}, 1}(X_i)}. +} +\keyword{internal} diff --git a/man/estimate_all_propensity_ratios.Rd b/man/estimate_all_propensity_ratios.Rd new file mode 100644 index 0000000..fa1cb46 --- /dev/null +++ b/man/estimate_all_propensity_ratios.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{estimate_all_propensity_ratios} +\alias{estimate_all_propensity_ratios} +\title{Estimate propensity ratios for all comparison cohorts via cross-fitting} +\usage{ +estimate_all_propensity_ratios(panel_obj, g, pairs, bs_df, K_folds, fold_id) +} +\arguments{ +\item{panel_obj}{panel object with \code{covariate_matrix} and +\code{unit_cohorts}} + +\item{g}{scalar: target treatment cohort} + +\item{pairs}{data.frame with column \code{gp}} + +\item{bs_df}{integer: B-spline df} + +\item{K_folds}{integer: number of cross-fitting folds} + +\item{fold_id}{integer vector length n: pre-generated fold assignments} +} +\value{ +named list of n-vectors, keyed by \code{as.character(gp)} +} +\description{ +For each unique \code{gp} in \code{pairs}, performs K-fold cross-fitting to +produce a full-sample n-vector of \eqn{\hat r_{g, g'}(X_i)}. +} +\keyword{internal} diff --git a/man/estimate_conditional_mean_edid.Rd b/man/estimate_conditional_mean_edid.Rd new file mode 100644 index 0000000..2bb2513 --- /dev/null +++ b/man/estimate_conditional_mean_edid.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{estimate_conditional_mean_edid} +\alias{estimate_conditional_mean_edid} +\title{Estimate the conditional mean \eqn{E[Y_s - Y_1 | G=g', X]}} +\usage{ +estimate_conditional_mean_edid( + X_train, + Y_delta_train, + G_train, + X_test, + gp, + bs_df = 4L +) +} +\arguments{ +\item{X_train}{numeric matrix n_train x d} + +\item{Y_delta_train}{numeric vector n_train: Y_s - Y_1 for all training units} + +\item{G_train}{numeric vector n_train: cohort values (Inf for never-treated)} + +\item{X_test}{numeric matrix n_test x d} + +\item{gp}{scalar: cohort to regress on (may be Inf)} + +\item{bs_df}{integer: B-spline degrees of freedom (default 4)} +} +\value{ +numeric vector length n_test +} +\description{ +Fits an OLS B-spline regression of \code{Y_delta} on \code{B(X)} using only +units with \code{G_train == gp}, then predicts for all test units. +} +\keyword{internal} diff --git a/man/estimate_propensity_ratio_edid.Rd b/man/estimate_propensity_ratio_edid.Rd new file mode 100644 index 0000000..17e9fab --- /dev/null +++ b/man/estimate_propensity_ratio_edid.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{estimate_propensity_ratio_edid} +\alias{estimate_propensity_ratio_edid} +\title{Estimate the propensity ratio r(X) = P(G=g|X) / P(G=g'|X)} +\usage{ +estimate_propensity_ratio_edid(X_train, G_train, X_test, g, gp, bs_df = 4L) +} +\arguments{ +\item{X_train}{numeric matrix n_train x d} + +\item{G_train}{numeric vector n_train: cohort values (Inf for never-treated)} + +\item{X_test}{numeric matrix n_test x d} + +\item{g}{scalar: target treatment cohort} + +\item{gp}{scalar: comparison cohort (may be Inf for never-treated)} + +\item{bs_df}{integer: B-spline degrees of freedom (default 4)} +} +\value{ +numeric vector length n_test: estimated r(X) values, >= 0 +} +\description{ +Implements the sieve (B-spline) estimator for the propensity ratio from +Chen, Sant'Anna & Xie (2025) Eq. (4.1)-(4.2). The ratio is estimated via +OLS minimising \eqn{E[r(X)^2 G_{g'} - 2 r(X) G_g]}. +} +\details{ +Closed form: +\deqn{\hat\beta = [B_{g'}' B_{g'}]^{-1} \sum_{i: G_i = g} B(X_i)} +Then \eqn{\hat r(X_i) = B(X_i)' \hat\beta}, clipped to [0, Inf). +} +\keyword{internal} diff --git a/man/fit_edid_cells.Rd b/man/fit_edid_cells.Rd new file mode 100644 index 0000000..bc3e671 --- /dev/null +++ b/man/fit_edid_cells.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-fit.R +\name{fit_edid_cells} +\alias{fit_edid_cells} +\title{Fit all (g, t) cells for the EDiD estimator} +\usage{ +fit_edid_cells( + panel_obj, + pt_assumption, + alpha, + store_eif, + xformla = NULL, + seed = NULL, + need_eif = FALSE +) +} +\arguments{ +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{pt_assumption}{character: \code{"all"} or \code{"post"}} + +\item{alpha}{significance level in (0, 1)} + +\item{store_eif}{logical: if TRUE, include EIF vectors in returned cells} + +\item{xformla}{one-sided formula or NULL: covariate formula (routed to +covariate path when non-trivial and \code{panel_obj$covariate_matrix} +is non-NULL)} + +\item{need_eif}{logical: if TRUE, always store EIF regardless of store_eif +(used internally when \code{n_bootstrap > 0})} +} +\value{ +list with elements: +\describe{ +\item{\code{cells}}{list of \code{edid_cell_result} objects} +\item{\code{eif_matrix}}{n x n_valid_cells numeric matrix, or NULL} +\item{\code{cell_index}}{data.frame: group, time, cell_id, is_pre} +} +} +\description{ +Iterates over all treatment cohorts and all time periods (excluding +\code{period_1}), computes point estimates, EIFs, and analytical SEs for +each cell. +} +\keyword{internal} diff --git a/man/generate_multiplier_weights_edid.Rd b/man/generate_multiplier_weights_edid.Rd new file mode 100644 index 0000000..c3a6775 --- /dev/null +++ b/man/generate_multiplier_weights_edid.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-bootstrap.R +\name{generate_multiplier_weights_edid} +\alias{generate_multiplier_weights_edid} +\title{Generate multiplier bootstrap weights} +\usage{ +generate_multiplier_weights_edid( + n, + n_bootstrap, + type = "rademacher", + cluster_indices = NULL, + seed = NULL +) +} +\arguments{ +\item{n}{integer: number of units} + +\item{n_bootstrap}{positive integer: number of bootstrap draws} + +\item{type}{character: \code{"rademacher"} (default), \code{"mammen"}, +or \code{"webb"}} + +\item{cluster_indices}{integer vector length n (values 1..G) or NULL} + +\item{seed}{integer seed or NULL} +} +\value{ +numeric matrix n x n_bootstrap +} +\description{ +Returns an \code{n x n_bootstrap} matrix of multiplier weights drawn from +the specified distribution. When \code{cluster_indices} is supplied, +weights are drawn at the cluster level (G x n_bootstrap) and then +expanded to unit level by repeating within cluster. +} +\keyword{internal} diff --git a/man/predict_basis_edid.Rd b/man/predict_basis_edid.Rd new file mode 100644 index 0000000..e497c9b --- /dev/null +++ b/man/predict_basis_edid.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-cov.R +\name{predict_basis_edid} +\alias{predict_basis_edid} +\title{Predict B-spline basis at new data using stored knot information} +\usage{ +predict_basis_edid(bs_obj_list, X_new_mat) +} +\arguments{ +\item{bs_obj_list}{list of length d: the \code{"bs_objects"} attribute from +\code{build_basis_matrix_edid()}} + +\item{X_new_mat}{numeric matrix, n_test x d} +} +\value{ +numeric matrix n_test x p (same column count as training basis) +} +\description{ +Evaluates the basis used during training (stored as \code{bs} objects) at +new covariate values. When the training basis fell back to a linear basis, +returns the linear approximation. +} +\keyword{internal} diff --git a/man/prepare_edid_panel.Rd b/man/prepare_edid_panel.Rd new file mode 100644 index 0000000..572a2a3 --- /dev/null +++ b/man/prepare_edid_panel.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-data.R +\name{prepare_edid_panel} +\alias{prepare_edid_panel} +\title{Prepare the panel object used throughout edid estimation} +\usage{ +prepare_edid_panel( + data, + yname, + idname, + tname, + gname, + xformla = NULL, + covariates = NULL, + clustervars = NULL, + control_group = "nevertreated", + anticipation = 0L +) +} +\arguments{ +\item{data}{data.frame (or data.table / tibble) already validated} + +\item{yname}{character scalar: outcome column name} + +\item{idname}{character scalar: unit id column name} + +\item{tname}{character scalar: time column name} + +\item{gname}{character scalar: first-treatment-period column name} + +\item{covariates}{NULL (stub)} + +\item{clustervars}{character scalar or NULL} + +\item{control_group}{\code{"nevertreated"} or \code{"notyettreated"}} + +\item{anticipation}{non-negative integer} +} +\value{ +a \code{panel_obj} list; see spec Section 5.1 +} +\description{ +Reshapes the long-format input \code{data} into a wide outcome matrix and +builds all masks and maps needed by downstream functions. +} +\keyword{internal} diff --git a/man/print.AGGTEobj_edid.Rd b/man/print.AGGTEobj_edid.Rd new file mode 100644 index 0000000..97384c5 --- /dev/null +++ b/man/print.AGGTEobj_edid.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggte.R +\name{print.AGGTEobj_edid} +\alias{print.AGGTEobj_edid} +\title{Print method for AGGTEobj_edid objects} +\usage{ +\method{print}{AGGTEobj_edid}(x, ...) +} +\arguments{ +\item{x}{an \code{AGGTEobj_edid} object} + +\item{...}{additional arguments (currently ignored)} +} +\value{ +\code{x} invisibly +} +\description{ +Prints aggregated treatment effects in a format similar to +\code{print.AGGTEobj}. +} diff --git a/man/print.edid_fit.Rd b/man/print.edid_fit.Rd new file mode 100644 index 0000000..c91749c --- /dev/null +++ b/man/print.edid_fit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-methods.R +\name{print.edid_fit} +\alias{print.edid_fit} +\title{Print method for edid_fit objects} +\usage{ +\method{print}{edid_fit}(x, ...) +} +\arguments{ +\item{x}{an \code{edid_fit} object} + +\item{...}{additional arguments (currently ignored)} +} +\value{ +\code{x} invisibly +} +\description{ +Displays the ATT(g,t) table in the same style as \code{print.MP} / +\code{summary.MP}, followed by footer metadata. +} diff --git a/man/run_multiplier_bootstrap_edid.Rd b/man/run_multiplier_bootstrap_edid.Rd new file mode 100644 index 0000000..7544d22 --- /dev/null +++ b/man/run_multiplier_bootstrap_edid.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-bootstrap.R +\name{run_multiplier_bootstrap_edid} +\alias{run_multiplier_bootstrap_edid} +\title{Run the multiplier bootstrap for EDiD estimates} +\usage{ +run_multiplier_bootstrap_edid( + cells, + eif_matrix, + cell_index, + panel_obj, + n_bootstrap, + bootstrap_weights = "rademacher", + seed = NULL, + aggregate = "all", + balance_e = NULL, + alpha = 0.05 +) +} +\arguments{ +\item{cells}{list of \code{edid_cell_result} objects} + +\item{eif_matrix}{n x n_cells numeric matrix of stored EIFs} + +\item{cell_index}{data.frame with columns \code{group}, \code{time}, +\code{cell_id}, \code{is_pre}} + +\item{panel_obj}{panel object from \code{prepare_edid_panel()}} + +\item{n_bootstrap}{positive integer number of bootstrap draws} + +\item{bootstrap_weights}{character: \code{"rademacher"}, \code{"mammen"}, +or \code{"webb"}} + +\item{seed}{integer seed or NULL} + +\item{aggregate}{character: which aggregations to return} + +\item{balance_e}{integer or NULL} + +\item{alpha}{significance level} +} +\value{ +list with elements \code{overall_b}, \code{event_study_b}, +\code{group_b}, \code{n_bootstrap}, \code{weight_type}, \code{seed} +} +\description{ +Generates \code{n_bootstrap} perturbed versions of all cell-level ATTs +by multiplying stored EIF vectors with multiplier weights, then re-aggregates +using the same fixed cohort-share weights. +} +\keyword{internal} diff --git a/man/safe_inference_edid.Rd b/man/safe_inference_edid.Rd new file mode 100644 index 0000000..579e339 --- /dev/null +++ b/man/safe_inference_edid.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-inference.R +\name{safe_inference_edid} +\alias{safe_inference_edid} +\title{Safely compute SE, CI, and p-value from an EIF vector} +\usage{ +safe_inference_edid(eif, cluster_indices = NULL, alpha = 0.05, att = NA_real_) +} +\arguments{ +\item{eif}{numeric vector length n (or NULL, for NA cells)} + +\item{cluster_indices}{integer vector length n (1..G) or NULL} + +\item{alpha}{significance level in (0, 1)} + +\item{att}{scalar ATT estimate (used for t-stat; may be NA for inference check)} +} +\value{ +named list: +\code{se}, \code{ci_lower}, \code{ci_upper}, \code{t_stat}, +\code{p_value}, \code{inference_valid} +} +\description{ +Dispatches to \code{compute_eif_se_edid()} with optional cluster aggregation. +If the resulting SE is not valid (zero, NA, or non-finite), all inference +results are set to \code{NA}. +} +\keyword{internal} diff --git a/man/safe_mean_edid.Rd b/man/safe_mean_edid.Rd new file mode 100644 index 0000000..a71202a --- /dev/null +++ b/man/safe_mean_edid.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-utils.R +\name{safe_mean_edid} +\alias{safe_mean_edid} +\title{Safe mean: returns NA on empty vector instead of NaN} +\usage{ +safe_mean_edid(x) +} +\arguments{ +\item{x}{numeric vector} +} +\value{ +scalar +} +\description{ +Safe mean: returns NA on empty vector instead of NaN +} +\keyword{internal} diff --git a/man/solve_ols_edid.Rd b/man/solve_ols_edid.Rd new file mode 100644 index 0000000..030b5f1 --- /dev/null +++ b/man/solve_ols_edid.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-linalg.R +\name{solve_ols_edid} +\alias{solve_ols_edid} +\title{Weighted OLS helper} +\usage{ +solve_ols_edid(X, y, weights = NULL) +} +\arguments{ +\item{X}{numeric matrix (n x p)} + +\item{y}{numeric vector length n} + +\item{weights}{numeric vector length n (NULL = uniform)} +} +\value{ +named list with elements \code{coef}, \code{fitted}, \code{residuals} +} +\description{ +Computes \eqn{\hat\beta = (X'WX)^{-1} X'Wy} using \code{.lm.fit()}. +Falls back to SVD-based pseudoinverse if the normal equations are +numerically singular. +} +\keyword{internal} diff --git a/man/summary.AGGTEobj_edid.Rd b/man/summary.AGGTEobj_edid.Rd new file mode 100644 index 0000000..43ff451 --- /dev/null +++ b/man/summary.AGGTEobj_edid.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-aggte.R +\name{summary.AGGTEobj_edid} +\alias{summary.AGGTEobj_edid} +\title{Summary method for AGGTEobj_edid objects} +\usage{ +\method{summary}{AGGTEobj_edid}(object, ...) +} +\arguments{ +\item{object}{an \code{AGGTEobj_edid} object} + +\item{...}{additional arguments (currently ignored)} +} +\value{ +\code{object} invisibly +} +\description{ +Delegates to \code{print.AGGTEobj_edid}. +} diff --git a/man/summary.edid_fit.Rd b/man/summary.edid_fit.Rd new file mode 100644 index 0000000..d934456 --- /dev/null +++ b/man/summary.edid_fit.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-methods.R +\name{summary.edid_fit} +\alias{summary.edid_fit} +\title{Summary method for edid_fit objects} +\usage{ +\method{summary}{edid_fit}(object, ...) +} +\arguments{ +\item{object}{an \code{edid_fit} object} + +\item{...}{additional arguments (currently ignored)} +} +\value{ +\code{object} invisibly +} +\description{ +Prints a structured summary of the EDiD estimation results including +the ATT(g,t) table (in MP style) plus aggregated overall, event-study, +and group estimates. +} diff --git a/man/validate_edid_inputs.Rd b/man/validate_edid_inputs.Rd new file mode 100644 index 0000000..ab6b116 --- /dev/null +++ b/man/validate_edid_inputs.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-validate.R +\name{validate_edid_inputs} +\alias{validate_edid_inputs} +\title{Validate inputs to \code{edid()}} +\usage{ +validate_edid_inputs( + data, + yname, + idname, + tname, + gname, + xformla = NULL, + covariates, + pt_assumption, + alp, + clustervars, + control_group, + biters, + anticipation, + survey_design +) +} +\arguments{ +\item{data}{data.frame or coercible} + +\item{yname}{character scalar: outcome column name} + +\item{idname}{character scalar: unit id column name} + +\item{tname}{character scalar: time column name} + +\item{gname}{character scalar: first-treatment-period column name} + +\item{covariates}{character vector or NULL} + +\item{pt_assumption}{character scalar, already matched via \code{match.arg}} + +\item{alp}{numeric scalar in (0, 1)} + +\item{clustervars}{character scalar or NULL} + +\item{control_group}{character scalar, already matched via \code{match.arg}} + +\item{biters}{non-negative integer (internal bootstrap iterations)} + +\item{anticipation}{non-negative integer} + +\item{survey_design}{always NULL (survey not yet implemented)} +} +\value{ +invisibly TRUE +} +\description{ +Performs all structural and type checks on user-supplied arguments. +Returns invisibly \code{TRUE} on success; stops with an informative message +on any failure. +} +\keyword{internal} diff --git a/man/vcov.edid_fit.Rd b/man/vcov.edid_fit.Rd new file mode 100644 index 0000000..85e6940 --- /dev/null +++ b/man/vcov.edid_fit.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edid-methods.R +\name{vcov.edid_fit} +\alias{vcov.edid_fit} +\title{Extract variance-covariance matrix from an edid_fit object} +\usage{ +\method{vcov}{edid_fit}(object, which = c("att_gt", "overall", "event_study", "group"), ...) +} +\arguments{ +\item{object}{an \code{edid_fit} object} + +\item{which}{character: one of \code{"att_gt"}, \code{"overall"}, +\code{"event_study"}, \code{"group"}} + +\item{...}{additional arguments (ignored)} +} +\value{ +square numeric matrix +} +\description{ +Returns the outer product of aggregated EIF vectors, scaled by \eqn{1/n^2}. +When bootstrap inference is used, returns a diagonal matrix of bootstrap +variance estimates. +} diff --git a/tests/testthat/helper-edid.R b/tests/testthat/helper-edid.R new file mode 100644 index 0000000..a18796b --- /dev/null +++ b/tests/testthat/helper-edid.R @@ -0,0 +1,137 @@ +# helper-edid.R +# Shared test data factories for edid() tests. +# Auto-loaded by testthat before any test file runs. + +#' Construct a minimal one-cohort balanced panel for testing +#' +#' @param n_treat Number of treated units (cohort g=3) +#' @param n_never Number of never-treated units +#' @param n_periods Number of time periods (periods 1..n_periods) +#' @param seed RNG seed for outcome generation +#' @return data.frame with columns: unit, time, outcome, first_treat +make_panel_1cohort <- function(n_treat = 20L, n_never = 20L, + n_periods = 5L, seed = 42L) { + set.seed(seed) + n <- n_treat + n_never + units <- seq_len(n) + times <- seq_len(n_periods) + + unit_ids <- rep(units, each = n_periods) + time_ids <- rep(times, times = n) + first_treat_vals <- c(rep(3L, n_treat * n_periods), # cohort g=3 + rep(Inf, n_never * n_periods)) # never treated + + # unit fixed effects + time trend + noise + unit_fe <- rep(rnorm(n, 0, 1), each = n_periods) + time_fe <- rep(seq(0, 0.5, length.out = n_periods), times = n) + noise <- rnorm(n * n_periods, 0, 0.5) + # Treatment effect of 2 for treated units in post periods + treated_post <- (unit_ids <= n_treat) & (time_ids >= 3L) + outcome <- unit_fe + time_fe + noise + 2 * treated_post + + data.frame( + unit = unit_ids, + time = time_ids, + outcome = outcome, + first_treat = first_treat_vals, + stringsAsFactors = FALSE + ) +} + +#' Construct a two-cohort staggered balanced panel for testing +#' +#' @param n_g3 Units in cohort g=3 +#' @param n_g5 Units in cohort g=5 +#' @param n_never Never-treated units +#' @param n_periods Time periods (1..n_periods) +#' @param seed RNG seed +#' @return data.frame with columns: unit, time, outcome, first_treat +make_panel_2cohort <- function(n_g3 = 15L, n_g5 = 15L, n_never = 20L, + n_periods = 7L, seed = 123L) { + set.seed(seed) + n <- n_g3 + n_g5 + n_never + units <- seq_len(n) + times <- seq_len(n_periods) + + unit_ids <- rep(units, each = n_periods) + time_ids <- rep(times, times = n) + first_treat_vals <- c( + rep(3L, n_g3 * n_periods), + rep(5L, n_g5 * n_periods), + rep(Inf, n_never * n_periods) + ) + + unit_fe <- rep(rnorm(n, 0, 1), each = n_periods) + time_fe <- rep(seq(0, 1, length.out = n_periods), times = n) + noise <- rnorm(n * n_periods, 0, 0.5) + treated_g3 <- (unit_ids <= n_g3) & (time_ids >= 3L) + treated_g5 <- (unit_ids > n_g3 & unit_ids <= n_g3 + n_g5) & (time_ids >= 5L) + outcome <- unit_fe + time_fe + noise + 1.5 * treated_g3 + 2.5 * treated_g5 + + data.frame( + unit = unit_ids, + time = time_ids, + outcome = outcome, + first_treat = first_treat_vals, + stringsAsFactors = FALSE + ) +} + +#' Construct a one-cohort panel with cluster variable +#' +#' @param n_clusters_treat Clusters among treated units +#' @param n_clusters_never Clusters among never-treated units +#' @param units_per_cluster Units per cluster +#' @param n_periods Time periods +#' @param seed RNG seed +#' @return data.frame with columns: unit, time, outcome, first_treat, cluster_id +make_panel_clustered <- function(n_clusters_treat = 5L, + n_clusters_never = 5L, + units_per_cluster = 4L, + n_periods = 5L, + seed = 77L) { + set.seed(seed) + n_treat <- n_clusters_treat * units_per_cluster + n_never <- n_clusters_never * units_per_cluster + n <- n_treat + n_never + + units <- seq_len(n) + times <- seq_len(n_periods) + unit_ids <- rep(units, each = n_periods) + time_ids <- rep(times, times = n) + + cluster_ids <- c( + rep(seq_len(n_clusters_treat), each = units_per_cluster * n_periods), + rep(seq_len(n_clusters_never) + n_clusters_treat, each = units_per_cluster * n_periods) + ) + + first_treat_vals <- c(rep(3L, n_treat * n_periods), + rep(Inf, n_never * n_periods)) + + cluster_fe <- rep(rnorm(n_clusters_treat + n_clusters_never, 0, 0.8), + each = units_per_cluster * n_periods) + unit_fe <- rep(rnorm(n, 0, 0.3), each = n_periods) + noise <- rnorm(n * n_periods, 0, 0.2) + treated_post <- (unit_ids <= n_treat) & (time_ids >= 3L) + outcome <- cluster_fe + unit_fe + noise + 1.8 * treated_post + + data.frame( + unit = unit_ids, + time = time_ids, + outcome = outcome, + first_treat = first_treat_vals, + cluster_id = cluster_ids, + stringsAsFactors = FALSE + ) +} + +#' Minimal two-period, two-group panel (1 treated, 1 never-treated unit) +make_degenerate_panel <- function() { + data.frame( + unit = c(1, 1, 2, 2), + time = c(1, 2, 1, 2), + outcome = c(0.5, 1.2, 0.3, 0.4), + first_treat = c(2, 2, Inf, Inf), + stringsAsFactors = FALSE + ) +} diff --git a/tests/testthat/test-edid-aggregate.R b/tests/testthat/test-edid-aggregate.R new file mode 100644 index 0000000..62f9e93 --- /dev/null +++ b/tests/testthat/test-edid-aggregate.R @@ -0,0 +1,97 @@ +library(testthat) + +# Helper: run fit_edid_cells on one-cohort panel +fit_one_cohort <- function(seed = 42, n_treat = 30, n_never = 30, n_periods = 5) { + df <- make_panel_1cohort(n_treat = n_treat, n_never = n_never, + n_periods = n_periods, seed = seed) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + fit <- fit_edid_cells(panel, pt_assumption = "all", alpha = 0.05, + store_eif = TRUE, covariates = NULL) + list(fit = fit, panel = panel) +} + +# ============================================================ +# 7.1 aggregate_overall_edid(): basic properties +# ============================================================ +test_that("aggregate_overall_edid() returns finite ATT", { + res <- fit_one_cohort() + agg <- aggregate_overall_edid(res$fit$cells, res$fit$eif_matrix, + res$fit$cell_index, res$panel, alpha = 0.05) + expect_true(is.finite(agg$att)) +}) + +test_that("aggregate_overall_edid() ATT is within [-10, 10] for ATT=2 DGP", { + res <- fit_one_cohort(seed = 42, n_treat = 50, n_never = 50) + agg <- aggregate_overall_edid(res$fit$cells, res$fit$eif_matrix, + res$fit$cell_index, res$panel, alpha = 0.05) + expect_true(agg$att > -10 && agg$att < 10) +}) + +test_that("aggregate_overall_edid() SE is positive and finite", { + res <- fit_one_cohort() + agg <- aggregate_overall_edid(res$fit$cells, res$fit$eif_matrix, + res$fit$cell_index, res$panel, alpha = 0.05) + expect_true(is.finite(agg$se)) + expect_true(agg$se > 0) +}) + +test_that("aggregate_overall_edid() EIF aggregated vector has zero mean", { + res <- fit_one_cohort() + agg <- aggregate_overall_edid(res$fit$cells, res$fit$eif_matrix, + res$fit$cell_index, res$panel, alpha = 0.05) + expect_equal(mean(agg$eif_agg), 0, tolerance = 1e-8) +}) + +# ============================================================ +# 7.2 aggregate_event_study_edid(): structure +# ============================================================ +test_that("aggregate_event_study_edid() returns a list with entries for each relative time", { + res <- fit_one_cohort() + es <- aggregate_event_study_edid(res$fit$cells, res$fit$eif_matrix, + res$fit$cell_index, res$panel, alpha = 0.05) + # Should have entries for each unique e = t - g in the cells + expect_true(is.list(es)) + expect_true(length(es) >= 1) + for (entry in es) { + expect_true(is.finite(entry$att)) + expect_true(is.finite(entry$se)) + } +}) + +# ============================================================ +# 7.3 aggregate_group_edid(): equal-time weights +# ============================================================ +test_that("aggregate_group_edid() returns one entry per treated cohort", { + df <- make_panel_2cohort() + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + fit <- fit_edid_cells(panel, pt_assumption = "all", alpha = 0.05, + store_eif = TRUE, covariates = NULL) + grp <- aggregate_group_edid(fit$cells, fit$eif_matrix, fit$cell_index, panel, alpha = 0.05) + expect_equal(length(grp), length(panel$treatment_groups)) + for (entry in grp) { + expect_true(is.finite(entry$att)) + } +}) + +# ============================================================ +# 7.4 WIF correction: EIF of aggregate has correct mean +# ============================================================ +test_that("compute_wif_contribution_edid() produces a length-n numeric vector", { + res <- fit_one_cohort() + wif <- compute_wif_contribution_edid( + weight_fn = function(cells, cell_idx, po) { + # simple uniform weight function for test + post_cells <- Filter(function(c) !c$is_pre && !is.na(c$att), cells) + n_post <- length(post_cells) + setNames(rep(1/n_post, n_post), seq_len(n_post)) + }, + cells = res$fit$cells, + eif_matrix = res$fit$eif_matrix, + cell_index = res$fit$cell_index, + panel_obj = res$panel + ) + expect_equal(length(wif), res$panel$n) + expect_true(is.numeric(wif)) +}) diff --git a/tests/testthat/test-edid-bootstrap.R b/tests/testthat/test-edid-bootstrap.R new file mode 100644 index 0000000..5dc8500 --- /dev/null +++ b/tests/testthat/test-edid-bootstrap.R @@ -0,0 +1,138 @@ +library(testthat) + +# ============================================================ +# 8.1 generate_multiplier_weights_edid(): Rademacher +# ============================================================ +test_that("generate_multiplier_weights_edid() Rademacher: values are in {-1, +1}", { + set.seed(42) + W <- generate_multiplier_weights_edid(n = 50L, n_bootstrap = 200L, + type = "rademacher", seed = 42L) + expect_equal(dim(W), c(50L, 200L)) + unique_vals <- unique(as.vector(W)) + expect_true(all(unique_vals %in% c(-1, 1))) +}) + +test_that("generate_multiplier_weights_edid() Rademacher: roughly 50/50 split", { + set.seed(1) + W <- generate_multiplier_weights_edid(n = 1000L, n_bootstrap = 1L, + type = "rademacher", seed = 1L) + prop_pos <- mean(W == 1) + expect_true(prop_pos > 0.4 && prop_pos < 0.6) +}) + +# ============================================================ +# 8.2 generate_multiplier_weights_edid(): Mammen +# ============================================================ +test_that("generate_multiplier_weights_edid() Mammen: values are in the two Mammen values", { + set.seed(42) + W <- generate_multiplier_weights_edid(n = 100L, n_bootstrap = 100L, + type = "mammen", seed = 42L) + mammen_lo <- -(sqrt(5) - 1) / 2 + mammen_hi <- (sqrt(5) + 1) / 2 + unique_vals <- unique(as.vector(W)) + expect_true(all(abs(unique_vals - mammen_lo) < 1e-10 | + abs(unique_vals - mammen_hi) < 1e-10)) +}) + +# ============================================================ +# 8.3 generate_multiplier_weights_edid(): Webb +# ============================================================ +test_that("generate_multiplier_weights_edid() Webb: values are among 6 Webb values", { + set.seed(42) + W <- generate_multiplier_weights_edid(n = 200L, n_bootstrap = 100L, + type = "webb", seed = 42L) + webb_vals <- c(-sqrt(3/2), -1, -sqrt(1/2), sqrt(1/2), 1, sqrt(3/2)) + unique_vals <- unique(as.vector(W)) + for (v in unique_vals) { + expect_true(any(abs(v - webb_vals) < 1e-10)) + } +}) + +# ============================================================ +# 8.4 generate_multiplier_weights_edid(): cluster expansion +# ============================================================ +test_that("generate_multiplier_weights_edid() cluster: units in same cluster get identical weight", { + set.seed(42) + G <- 5L + n <- 20L # 4 units per cluster + cluster_idx <- rep(1:G, each = 4L) + W <- generate_multiplier_weights_edid(n = n, n_bootstrap = 50L, + type = "rademacher", + cluster_indices = cluster_idx, + seed = 42L) + expect_equal(dim(W), c(n, 50L)) + # All units in cluster 1 must have the same weight for each draw + for (b in seq_len(50)) { + expect_true(length(unique(W[cluster_idx == 1L, b])) == 1L) + } +}) + +# ============================================================ +# 8.5 generate_multiplier_weights_edid(): reproducibility with seed +# ============================================================ +test_that("generate_multiplier_weights_edid() is reproducible with seed", { + W1 <- generate_multiplier_weights_edid(50L, 100L, "rademacher", seed = 777L) + W2 <- generate_multiplier_weights_edid(50L, 100L, "rademacher", seed = 777L) + expect_identical(W1, W2) +}) + +# ============================================================ +# 8.6 compute_bootstrap_stats_edid(): output structure +# ============================================================ +test_that("compute_bootstrap_stats_edid() returns named list with correct fields", { + set.seed(1) + att_hat <- 1.5 + boot_draws <- rnorm(200, mean = att_hat, sd = 0.3) + res <- compute_bootstrap_stats_edid(boot_draws, att_hat, alpha = 0.05) + expect_named(res, c("se_boot", "ci_lower", "ci_upper", "p_value_boot"), + ignore.order = TRUE) +}) + +test_that("compute_bootstrap_stats_edid() SE is positive", { + set.seed(2) + att_hat <- 0.8 + boot_draws <- rnorm(500, att_hat, 0.5) + res <- compute_bootstrap_stats_edid(boot_draws, att_hat) + expect_true(res$se_boot > 0) +}) + +test_that("compute_bootstrap_stats_edid() CI lower < upper", { + set.seed(3) + att_hat <- 2.0 + boot_draws <- rnorm(200, att_hat, 0.4) + res <- compute_bootstrap_stats_edid(boot_draws, att_hat) + expect_true(res$ci_lower < res$ci_upper) +}) + +test_that("compute_bootstrap_stats_edid() p_value in [0, 1]", { + set.seed(4) + att_hat <- 0.0 + boot_draws <- rnorm(300, 0, 0.5) + res <- compute_bootstrap_stats_edid(boot_draws, att_hat) + expect_true(res$p_value_boot >= 0 && res$p_value_boot <= 1) +}) + +# ============================================================ +# 8.7 run_multiplier_bootstrap_edid(): basic smoke test +# ============================================================ +test_that("run_multiplier_bootstrap_edid() runs without error and returns bootstrap draws", { + df <- make_panel_1cohort(seed = 77) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + fit <- fit_edid_cells(panel, pt_assumption = "all", alpha = 0.05, + store_eif = TRUE, covariates = NULL) + boot_res <- run_multiplier_bootstrap_edid( + cells = fit$cells, + eif_matrix = fit$eif_matrix, + cell_index = fit$cell_index, + panel_obj = panel, + n_bootstrap = 100L, + bootstrap_weights = "rademacher", + seed = 42L, + aggregate = "overall", + alpha = 0.05 + ) + expect_true(!is.null(boot_res$overall_b)) + expect_equal(length(boot_res$overall_b), 100L) + expect_true(all(is.finite(boot_res$overall_b))) +}) diff --git a/tests/testthat/test-edid-cov-basic.R b/tests/testthat/test-edid-cov-basic.R new file mode 100644 index 0000000..6bfa10d --- /dev/null +++ b/tests/testthat/test-edid-cov-basic.R @@ -0,0 +1,135 @@ +library(testthat) + +# ============================================================ +# Shared helpers +# ============================================================ + +make_panel_cov <- function(n = 90, n_periods = 6, seed = 1) { + set.seed(seed) + ids <- rep(1:n, each = n_periods) + times <- rep(1:n_periods, times = n) + g_unit <- rep(c(3, 5, Inf), each = n / 3) + g_vec <- g_unit[ids] + x1 <- rep(rnorm(n), each = n_periods) + y <- 0.5 * times + 0.2 * x1 + + as.numeric(times >= g_vec) * 1.0 + + rnorm(n * n_periods, sd = 0.5) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) +} + +# ============================================================ +# Regression: xformla=NULL and xformla=~1 must be identical +# ============================================================ + +test_that("xformla=NULL and xformla=~1 produce bit-for-bit identical results", { + df <- make_panel_cov(seed = 10) + fit0 <- edid(df, "y", "id", "t", "g") + fit1 <- edid(df, "y", "id", "t", "g", xformla = ~1) + + expect_equal(fit0$overall$att, fit1$overall$att, tolerance = 1e-12) + expect_equal(fit0$overall$se, fit1$overall$se, tolerance = 1e-12) + expect_equal(fit0$att_gt$att, fit1$att_gt$att, tolerance = 1e-12) + expect_equal(fit0$att_gt$se, fit1$att_gt$se, tolerance = 1e-12) +}) + +# Confirm the ~1 path truly skips covariate estimation (covariate_matrix is NULL) +test_that("xformla=~1 routes to no-covariate path (covariate_matrix is NULL)", { + df <- make_panel_cov(seed = 11) + panel0 <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~1) + expect_null(panel0$covariate_matrix) +}) + +test_that("xformla=NULL routes to no-covariate path (covariate_matrix is NULL)", { + df <- make_panel_cov(seed = 12) + panel0 <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = NULL) + expect_null(panel0$covariate_matrix) +}) + +# ============================================================ +# Output structure: covariate path returns same class/slots +# ============================================================ + +test_that("covariate path returns edid_fit with all required slots", { + df <- make_panel_cov(seed = 20) + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L) + + expect_s3_class(fit, "edid_fit") + expect_true(is.data.frame(fit$att_gt)) + expect_true(all(c("group", "time", "att", "se", "ci_lower", "ci_upper") %in% + names(fit$att_gt))) + expect_true(!is.null(fit$overall)) + expect_true(is.numeric(fit$overall$att)) + expect_true(is.finite(fit$overall$att)) + expect_true(is.numeric(fit$overall$se)) + expect_true(fit$overall$se > 0) +}) + +# ============================================================ +# Covariate path: ATTs are finite for all post-treatment cells +# ============================================================ + +test_that("covariate path: all post-treatment ATTs are finite", { + df <- make_panel_cov(seed = 30) + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L) + post_cells <- fit$att_gt[!fit$att_gt$is_pre, ] + expect_true(nrow(post_cells) > 0L) + expect_true(all(is.finite(post_cells$att))) + expect_true(all(is.finite(post_cells$se))) + expect_true(all(post_cells$se > 0)) +}) + +# ============================================================ +# No-covariate regression: no change after adding irrelevant xformla +# NOTE: we do NOT expect them to be equal, only that both are valid +# ============================================================ + +test_that("covariate path runs without error on 2D covariate formula", { + df <- make_panel_cov(seed = 40) + df$x2 <- rep(rnorm(nrow(df) / 6), each = 6) + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1 + x2, seed = 1L) + expect_s3_class(fit, "edid_fit") + expect_true(is.finite(fit$overall$att)) +}) + +# ============================================================ +# Transformed formula: I(x1^2) supported via model.matrix() +# ============================================================ + +test_that("xformla with I(x1^2) runs and differs from ~x1 on nonlinear DGP", { + set.seed(55) + n <- 90; T <- 6 + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + x1u <- rnorm(n) + g_u <- rep(c(3, 5, Inf), each = n / 3) + x1 <- rep(x1u, each = T) + g <- g_u[ids] + # nonlinear effect of x1 + y <- 0.5 * times + x1^2 + as.numeric(times >= g) + rnorm(n * T, sd = 0.5) + df <- data.frame(id = ids, t = times, y = y, g = g, x1 = x1) + + fit_lin <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L) + fit_quad <- edid(df, "y", "id", "t", "g", xformla = ~ x1 + I(x1^2), seed = 1L) + + # Both should run; they should not be identical (different model matrix) + expect_s3_class(fit_lin, "edid_fit") + expect_s3_class(fit_quad, "edid_fit") + expect_false(isTRUE(all.equal(fit_lin$overall$att, fit_quad$overall$att, + tolerance = 1e-6))) +}) + +# ============================================================ +# Factor covariate: supported via model.matrix() +# ============================================================ + +test_that("factor covariate is accepted and produces finite results", { + df <- make_panel_cov(seed = 60) + df$fac <- as.factor(rep(c("A", "B", "C"), length.out = nrow(df))) + # First: make factor time-invariant within unit + fac_unit <- df$fac[df$t == 1] + df$fac <- fac_unit[df$id] + + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1 + fac, seed = 1L) + expect_s3_class(fit, "edid_fit") + expect_true(is.finite(fit$overall$att)) +}) diff --git a/tests/testthat/test-edid-cov-eif.R b/tests/testthat/test-edid-cov-eif.R new file mode 100644 index 0000000..e2ce501 --- /dev/null +++ b/tests/testthat/test-edid-cov-eif.R @@ -0,0 +1,231 @@ +library(testthat) + +# ============================================================ +# Tests for the covariate-path EIF and generated outcomes. +# These tests verify the FORMULA, not just zero-mean property. +# ============================================================ + +make_simple_panel <- function(n = 120, seed = 1) { + set.seed(seed) + T <- 5 + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + g_unit <- rep(c(3, Inf), each = n / 2) + g_vec <- g_unit[ids] + x1u <- rnorm(n) + x1 <- rep(x1u, each = T) + y <- 0.5 * times + x1 + + as.numeric(times >= g_vec) * 1.0 + + rnorm(n * T, sd = 0.4) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) +} + +# ============================================================ +# EIF has (approximately) zero mean +# ============================================================ + +test_that("covariate-path EIF has near-zero mean for each cell", { + df <- make_simple_panel(n = 120, seed = 10) + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1, + store_eif = TRUE, seed = 1L) + # eif_matrix is n x n_cells + eif_mat <- fit$eif + if (!is.null(eif_mat)) { + col_means <- colMeans(eif_mat, na.rm = TRUE) + expect_true(all(abs(col_means) < 1e-10), + info = paste("EIF column means:", paste(round(col_means, 8), collapse = ", "))) + } +}) + +# ============================================================ +# SE = sqrt(sum(eif^2)/n^2): verify via manual calculation +# ============================================================ + +test_that("reported SE matches manual EIF plug-in formula for valid-inference cells", { + # Note: cells where inference_valid = FALSE (SE below eps threshold, e.g. exact + # pre-treatment zeros) will have reported SE = NA even though sqrt(sum(eif^2)/n^2) + # gives a finite (possibly 0) value. We compare only cells with finite reported SE. + df <- make_simple_panel(n = 120, seed = 20) + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1, + store_eif = TRUE, aggregate = "none", seed = 1L) + eif_mat <- fit$eif + if (!is.null(eif_mat)) { + n <- fit$n + manual_ses <- sqrt(colSums(eif_mat^2) / n^2) + reported_ses <- fit$att_gt$se + # Compare only where both are finite and reported SE > 0 + valid <- is.finite(reported_ses) & reported_ses > 0 + if (sum(valid) > 0L) { + expect_equal(manual_ses[valid], reported_ses[valid], tolerance = 1e-8, + info = "SE from EIF^2/n^2 must match reported SE for valid cells") + } + } +}) + +# ============================================================ +# EIF formula correctness: direct vs compute_eif_cov_edid() +# ============================================================ +# For a SINGLE pair (one column in gen_out_mat), the EIF is: +# EIF_i = w * phi_i - att_gt +# where w = 1 (only one pair, weights sum to 1). +# This is the reference for checking the formula exactly. + +test_that("compute_eif_cov_edid formula: weighted_phi minus att_gt, then centered", { + # Build a minimal scenario with exactly 1 pair + df <- make_simple_panel(n = 120, seed = 30) + devtools::load_all(quiet = TRUE) + + panel_obj <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~ x1) + g <- 3; t <- 3 + pairs <- did:::enumerate_valid_pairs_edid(g, panel_obj$treatment_groups, + panel_obj$time_periods, + panel_obj$period_1, "post", + panel_obj$anticipation) + fold_id <- did:::build_crossfit_folds_edid(panel_obj$n, 5L, seed = 1L) + + # Build nuisances + pairs_for_nuis <- pairs + pairs_for_nuis$gp[is.finite(pairs_for_nuis$gp) & pairs_for_nuis$gp == g] <- Inf + prop_r <- did:::estimate_all_propensity_ratios(panel_obj, g, pairs_for_nuis, + 4L, 5L, fold_id) + cond_m <- did:::estimate_all_conditional_means(panel_obj, pairs_for_nuis, t, + 4L, 5L, fold_id) + + gen_out <- did:::compute_generated_outcomes_cov_edid(panel_obj, g, t, pairs, + prop_r, cond_m, "post") + H <- ncol(gen_out) + omega <- did:::compute_omega_star_cov_edid(panel_obj, g, t, pairs, prop_r, cond_m) + weights <- did:::compute_efficient_weights_edid(omega) + att_gt <- sum(weights * colMeans(gen_out, na.rm = TRUE)) + + # Compute EIF via the function + eif_fn <- did:::compute_eif_cov_edid(panel_obj, gen_out, weights, att_gt, g) + + # Compute EIF directly: (gen_out %*% weights) - att_gt, centered + eif_ref <- drop(gen_out %*% weights) - att_gt + eif_ref <- eif_ref - mean(eif_ref) + + expect_equal(eif_fn, eif_ref, tolerance = 1e-10, + info = "compute_eif_cov_edid must implement (w^T phi) - att_gt centered") +}) + +# ============================================================ +# Wrong-sign EIF diagnostic: must be caught by SE comparison +# ============================================================ +# This is a "canary" test: if the EIF sign/scale were wrong, the implied +# SE would differ from the correct value. We verify that a deliberately +# wrong EIF produces a detectably different SE. + +test_that("wrong-sign EIF produces materially different SE", { + df <- make_simple_panel(n = 120, seed = 40) + panel_obj <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~ x1) + g <- 3; t <- 3 + pairs <- did:::enumerate_valid_pairs_edid(g, panel_obj$treatment_groups, + panel_obj$time_periods, + panel_obj$period_1, "post", + panel_obj$anticipation) + fold_id <- did:::build_crossfit_folds_edid(panel_obj$n, 5L, seed = 1L) + pairs_nuis <- pairs + pairs_nuis$gp[is.finite(pairs_nuis$gp) & pairs_nuis$gp == g] <- Inf + prop_r <- did:::estimate_all_propensity_ratios(panel_obj, g, pairs_nuis, + 4L, 5L, fold_id) + cond_m <- did:::estimate_all_conditional_means(panel_obj, pairs_nuis, t, + 4L, 5L, fold_id) + gen_out <- did:::compute_generated_outcomes_cov_edid(panel_obj, g, t, pairs, + prop_r, cond_m, "post") + omega <- did:::compute_omega_star_cov_edid(panel_obj, g, t, pairs, prop_r, cond_m) + weights <- did:::compute_efficient_weights_edid(omega) + att_gt <- sum(weights * colMeans(gen_out, na.rm = TRUE)) + n <- panel_obj$n + + # Correct EIF + eif_correct <- did:::compute_eif_cov_edid(panel_obj, gen_out, weights, att_gt, g) + se_correct <- sqrt(sum(eif_correct^2) / n^2) + + # Wrong EIF: adds (Ig/pi_g) * att_gt extra term (old bug) + pi_g <- panel_obj$cohort_fractions[[as.character(g)]] + Ig <- as.numeric(panel_obj$cohort_masks[[as.character(g)]]) + eif_wrong <- drop(gen_out %*% weights) + (Ig / pi_g) * att_gt + eif_wrong <- eif_wrong - mean(eif_wrong) + se_wrong <- sqrt(sum(eif_wrong^2) / n^2) + + # The wrong SE should differ from the correct one when ATT != 0 + if (abs(att_gt) > 0.05) { + expect_false(isTRUE(all.equal(se_correct, se_wrong, tolerance = 1e-4)), + info = "Wrong EIF (old bug) must produce detectably different SE") + } +}) + +# ============================================================ +# Generated outcomes: self-comparison pair has correct structure +# ============================================================ + +test_that("generated outcome for self-comparison pair: zero for non-g/non-inf units", { + # For a self-comparison pair (gp=g remapped to Inf): + # phi_i != 0 only for G_g or G_inf units. + # For units in other cohorts, both Ig=0 and I_inf=0, so phi_i = 0. + df <- make_simple_panel(n = 120, seed = 50) + # Add a third cohort so we have "other" units + set.seed(50) + n_units <- 120; T <- 5 + ids <- rep(1:n_units, each = T) + times <- rep(1:T, times = n_units) + g_unit <- rep(c(3, 5, Inf), each = n_units / 3) + g_vec <- g_unit[ids] + x1u <- rnorm(n_units) + x1 <- rep(x1u, each = T) + y <- 0.5 * times + x1 + as.numeric(times >= g_vec) + rnorm(n_units * T, sd = 0.4) + df2 <- data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) + + panel_obj <- did:::prepare_edid_panel(df2, "y", "id", "t", "g", xformla = ~ x1) + g <- 3; t <- 3 + # Use PT-Post pair: single self-comparison pair (gp=g, tpre=g-1) + pairs <- did:::enumerate_valid_pairs_edid(g, panel_obj$treatment_groups, + panel_obj$time_periods, + panel_obj$period_1, "post", + panel_obj$anticipation) + # Self-comparison pair: gp == g; the code remaps to Inf + fold_id <- did:::build_crossfit_folds_edid(panel_obj$n, 5L, seed = 1L) + pairs_nuis <- pairs + pairs_nuis$gp[is.finite(pairs_nuis$gp) & pairs_nuis$gp == g] <- Inf + prop_r <- did:::estimate_all_propensity_ratios(panel_obj, g, pairs_nuis, + 4L, 5L, fold_id) + cond_m <- did:::estimate_all_conditional_means(panel_obj, pairs_nuis, t, + 4L, 5L, fold_id) + + gen_out <- did:::compute_generated_outcomes_cov_edid(panel_obj, g, t, pairs, + prop_r, cond_m, "post") + # Cohort 5 units are neither G=g nor G=Inf, so their phi should be ~0 + mask_g5 <- (panel_obj$unit_cohorts == 5) + phi_col1 <- gen_out[, 1L] + phi_g5 <- phi_col1[mask_g5] + expect_true(all(abs(phi_g5) < 1e-10), + info = paste("phi for cohort-5 units in self-pair:", round(phi_g5, 4))) +}) + +# ============================================================ +# Generated outcomes: E[phi] ≈ ATT for post-treatment cells +# ============================================================ + +test_that("mean generated outcome (weighted) approximates ATT for post-treatment cell", { + # With a linear DGP where the true ATT is known (approx 1.0), + # mean(gen_out %*% w) should be close to 1 for large enough n. + set.seed(100) + n_units <- 200; T <- 5 + ids <- rep(1:n_units, each = T) + times <- rep(1:T, times = n_units) + g_unit <- rep(c(3, Inf), each = n_units / 2) + g_vec <- g_unit[ids] + x1u <- rnorm(n_units) + x1 <- rep(x1u, each = T) + y <- 0.5 * times + 0.5 * x1 + as.numeric(times >= g_vec) + rnorm(n_units * T, sd = 0.5) + df <- data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) + + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L) + # ATT for cell (g=3, t=3) should be around 1.0 + cell_att <- fit$att_gt$att[fit$att_gt$group == 3 & fit$att_gt$time == 3] + if (length(cell_att) == 1L) { + expect_true(abs(cell_att - 1.0) < 0.5, + info = paste("ATT(3,3) =", round(cell_att, 3), "; expected ~1.0")) + } +}) diff --git a/tests/testthat/test-edid-cov-formula.R b/tests/testthat/test-edid-cov-formula.R new file mode 100644 index 0000000..a3740fd --- /dev/null +++ b/tests/testthat/test-edid-cov-formula.R @@ -0,0 +1,156 @@ +library(testthat) + +# ============================================================ +# Shared helpers +# ============================================================ + +make_panel_2cov <- function(n = 90, n_periods = 6, seed = 1) { + set.seed(seed) + ids <- rep(1:n, each = n_periods) + times <- rep(1:n_periods, times = n) + g_unit <- rep(c(3, 5, Inf), each = n / 3) + g_vec <- g_unit[ids] + x1u <- rnorm(n) + x2u <- rnorm(n) + x1 <- rep(x1u, each = n_periods) + x2 <- rep(x2u, each = n_periods) + y <- 0.5 * times + x1 + x2^2 + + as.numeric(times >= g_vec) * 1.0 + + rnorm(n * n_periods, sd = 0.5) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1, x2 = x2) +} + +# ============================================================ +# Time-varying covariate: must fail regardless of which period varies +# ============================================================ + +test_that("time-varying covariate (post-period change) fails before estimation", { + df <- make_panel_2cov(seed = 10) + # Only post-period change for one unit — still must be rejected + df$x1[df$id == 2 & df$t == 4] <- df$x1[df$id == 2 & df$t == 4] + 1.5 + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ x1), + regexp = "time-varying" + ) +}) + +test_that("time-varying covariate (early-period change) fails before estimation", { + df <- make_panel_2cov(seed = 11) + # Change in period 2 for one unit + df$x1[df$id == 3 & df$t == 2] <- df$x1[df$id == 3 & df$t == 1] + 0.5 + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ x1), + regexp = "time-varying" + ) +}) + +# ============================================================ +# NA covariate: both period-1 and late-period NAs rejected +# ============================================================ + +test_that("NA in covariate period 1 for one unit is rejected", { + df <- make_panel_2cov(seed = 20) + df$x1[df$id == 1 & df$t == 1] <- NA_real_ + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ x1), + regexp = "NA" + ) +}) + +test_that("NA in covariate late period for one unit is rejected", { + df <- make_panel_2cov(seed = 21) + df$x2[df$id == 5 & df$t == 5] <- NA_real_ + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ x2), + regexp = "NA" + ) +}) + +# ============================================================ +# formula semantics: model.matrix() expansion +# ============================================================ + +test_that("covariate_matrix uses model.matrix() and handles I() correctly", { + df <- make_panel_2cov(seed = 30) + p0 <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~ x1) + p1 <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~ x1 + I(x1^2)) + + n_units <- length(unique(df$id)) + expect_equal(nrow(p0$covariate_matrix), n_units) + expect_equal(ncol(p0$covariate_matrix), 1L) # just x1 + + expect_equal(nrow(p1$covariate_matrix), n_units) + expect_equal(ncol(p1$covariate_matrix), 2L) # x1, I(x1^2) + + # I(x1^2) column equals x1^2 + x1_unit <- df$x1[df$t == df$t[1]][match(sort(unique(df$id)), df$id[df$t == df$t[1]])] + expect_equal(p1$covariate_matrix[, 2L], x1_unit^2, tolerance = 1e-10) +}) + +test_that("interaction in xformla produces expected number of columns", { + df <- make_panel_2cov(seed = 31) + p <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~ x1 * x2) + n_units <- length(unique(df$id)) + expect_equal(nrow(p$covariate_matrix), n_units) + # x1, x2, x1:x2 = 3 columns + expect_equal(ncol(p$covariate_matrix), 3L) +}) + +# ============================================================ +# factor covariate: supported via model.matrix +# ============================================================ + +test_that("factor covariate is accepted and covariate_matrix has dummy columns", { + df <- make_panel_2cov(seed = 40) + fac_unit <- factor(rep(c("A", "B", "C"), each = nrow(df) / 6 / 3 + 1))[1:(nrow(df) / 6)] + df$fac <- rep(fac_unit, each = 6) + + p <- did:::prepare_edid_panel(df, "y", "id", "t", "g", xformla = ~ x1 + fac) + # model.matrix with factor "fac" (3 levels) and x1 produces 3 columns + # (x1, facB, facC) with treatment contrast; intercept is removed + n_units <- length(unique(df$id)) + expect_equal(nrow(p$covariate_matrix), n_units) + expect_true(ncol(p$covariate_matrix) >= 2L) # at least x1 + one dummy +}) + +# ============================================================ +# Formula with x1:x2 vs x1 * x2 parity (interaction) +# ============================================================ + +test_that("xformla=~x1+x2+x1:x2 and xformla=~x1*x2 produce identical covariate matrices", { + df <- make_panel_2cov(seed = 50) + p1 <- did:::prepare_edid_panel(df, "y", "id", "t", "g", + xformla = ~ x1 + x2 + x1:x2) + p2 <- did:::prepare_edid_panel(df, "y", "id", "t", "g", + xformla = ~ x1 * x2) + expect_equal(p1$covariate_matrix, p2$covariate_matrix, tolerance = 1e-10) +}) + +# ============================================================ +# Reproducibility via seed +# ============================================================ + +test_that("two calls with same seed produce identical results on covariate path", { + df <- make_panel_2cov(seed = 60) + fit1 <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 42L) + fit2 <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 42L) + + expect_equal(fit1$overall$att, fit2$overall$att, tolerance = 1e-12) + expect_equal(fit1$overall$se, fit2$overall$se, tolerance = 1e-12) + expect_equal(fit1$att_gt$att, fit2$att_gt$att, tolerance = 1e-12) +}) + +test_that("different seeds produce different fold assignments (probabilistically)", { + # With n=90 units and K=5 folds, different seeds should almost always give + # different fold vectors. + set.seed(999) + folds1 <- did:::build_crossfit_folds_edid(90L, 5L, seed = 1L) + folds2 <- did:::build_crossfit_folds_edid(90L, 5L, seed = 2L) + expect_false(identical(folds1, folds2)) +}) + +test_that("same seed always produces same fold assignments", { + folds1 <- did:::build_crossfit_folds_edid(90L, 5L, seed = 77L) + folds2 <- did:::build_crossfit_folds_edid(90L, 5L, seed = 77L) + expect_identical(folds1, folds2) +}) diff --git a/tests/testthat/test-edid-cov-validation.R b/tests/testthat/test-edid-cov-validation.R new file mode 100644 index 0000000..39c1f9e --- /dev/null +++ b/tests/testthat/test-edid-cov-validation.R @@ -0,0 +1,142 @@ +library(testthat) + +# ============================================================ +# Shared helpers +# ============================================================ + +make_panel_cov <- function(n = 60, n_periods = 6, seed = 42) { + set.seed(seed) + ids <- rep(1:n, each = n_periods) + times <- rep(1:n_periods, times = n) + cohorts <- rep(c(3, 5, Inf), each = n / 3) + g_vec <- cohorts[ids] + x1_unit <- rnorm(n) + x2_unit <- rnorm(n) + x1 <- x1_unit[ids] # time-invariant + x2 <- x2_unit[ids] + y <- 0.5 * times + 0.3 * x1 + + as.numeric(times >= g_vec) * (1 + 0.2 * x1_unit[ids]) + + rnorm(n * n_periods, sd = 0.5) + data.frame(id = ids, t = times, y = y, + g = g_vec, x1 = x1, x2 = x2) +} + +# ============================================================ +# Deprecated covariates argument +# ============================================================ + +test_that("covariates= argument errors with redirect message", { + df <- make_panel_cov() + expect_error( + edid(df, "y", "id", "t", "g", covariates = c("x1")), + regexp = "replaced by.*xformla" + ) +}) + +# ============================================================ +# xformla type check +# ============================================================ + +test_that("non-formula xformla errors immediately", { + df <- make_panel_cov() + expect_error( + edid(df, "y", "id", "t", "g", xformla = "x1"), + regexp = "one-sided formula" + ) + expect_error( + edid(df, "y", "id", "t", "g", xformla = 1L), + regexp = "one-sided formula" + ) +}) + +# ============================================================ +# Missing variable in xformla +# ============================================================ + +test_that("xformla with missing column errors informatively", { + df <- make_panel_cov() + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ nonexistent_var), + regexp = "nonexistent_var" + ) +}) + +# ============================================================ +# NA in covariates: any period +# ============================================================ + +test_that("NA in covariate column (any row) is rejected", { + df <- make_panel_cov() + + # NA in period 1 for unit 1 + df_na1 <- df + df_na1$x1[1] <- NA_real_ + expect_error( + edid(df_na1, "y", "id", "t", "g", xformla = ~ x1), + regexp = "NA values" + ) + + # NA in a later period (period 4) for unit 5 + df_na2 <- df + df_na2$x1[df_na2$id == 5 & df_na2$t == 4] <- NA_real_ + expect_error( + edid(df_na2, "y", "id", "t", "g", xformla = ~ x1), + regexp = "NA values" + ) +}) + +# ============================================================ +# Time-varying covariate rejection +# ============================================================ + +test_that("time-varying covariate is rejected with informative error", { + df <- make_panel_cov() + + # Introduce variation for unit 1 in period 2 (keep period 1 value the same) + period1_val <- df$x1[df$id == 1 & df$t == 1] + df$x1[df$id == 1 & df$t == 2] <- period1_val + 1.0 + + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ x1), + regexp = "time-varying" + ) +}) + +# ============================================================ +# ~1 formula: no error, routes to no-cov path +# ============================================================ + +test_that("xformla = ~1 runs without error and returns edid_fit", { + df <- make_panel_cov() + fit <- edid(df, "y", "id", "t", "g", xformla = ~1) + expect_s3_class(fit, "edid_fit") +}) + +# ============================================================ +# Empty formula expansion (only intercept): routes to no-cov path +# ============================================================ + +test_that("xformla with no variables silently routes to no-covariate path", { + # ~1 + 0 has no variables (all.vars() = character(0)), so + # it is treated the same as xformla = ~1: no covariate path invoked. + df <- make_panel_cov() + fit_nocov <- edid(df, "y", "id", "t", "g") + fit_10 <- edid(df, "y", "id", "t", "g", xformla = ~1 + 0) + # Both should give the same result (no-cov path) + expect_equal(fit_nocov$overall$att, fit_10$overall$att, tolerance = 1e-10) +}) + +# ============================================================ +# model.matrix() expansion errors are caught +# ============================================================ + +test_that("model.matrix() failure in xformla is caught with informative error", { + df <- make_panel_cov() + # Make x1 character (model.matrix would fail or coerce unexpectedly) + df$x1chr <- as.character(df$x1) + # character cols fail the numeric-or-factor check + expect_error( + edid(df, "y", "id", "t", "g", xformla = ~ x1chr), + regexp = "numeric or factor" + ) +}) diff --git a/tests/testthat/test-edid-cov-variance.R b/tests/testthat/test-edid-cov-variance.R new file mode 100644 index 0000000..0bf23d0 --- /dev/null +++ b/tests/testthat/test-edid-cov-variance.R @@ -0,0 +1,156 @@ +library(testthat) + +# ============================================================ +# Variance calibration tests for the covariate path. +# Use small Monte Carlo repetitions (R=50) so the testthat +# suite finishes quickly; the full simulation is in +# benchmark/edid_cov_sim.R. +# ============================================================ + +# --------------------------------------------------------------- +# DGP: balanced panel, single cohort (g=3) + never-treated, +# linear covariate effect. +# True ATT = 1.0 for all post-treatment periods. +# --------------------------------------------------------------- +sim_one_draw <- function(n, seed) { + set.seed(seed) + T <- 5 + ids <- rep(1:n, each = T) + times <- rep(1:T, times = n) + g_unit <- rep(c(3, Inf), each = n / 2) + g_vec <- g_unit[ids] + x1u <- rnorm(n) + x1 <- rep(x1u, each = T) + y <- times + 0.5 * x1 + as.numeric(times >= g_vec) + rnorm(n * T, sd = 0.5) + data.frame(id = ids, t = times, y = y, g = g_vec, x1 = x1) +} + +run_mc <- function(n, R = 50) { + results <- lapply(seq_len(R), function(r) { + df <- sim_one_draw(n, seed = r) + tryCatch({ + fit <- edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L, + aggregate = "none") + att_gt_df <- fit$att_gt + # post-treatment cells + post <- att_gt_df[!att_gt_df$is_pre, ] + list(att = post$att, se = post$se, + group = post$group, time = post$time) + }, error = function(e) NULL) + }) + results <- Filter(Negate(is.null), results) + results +} + +# ============================================================ +# SE vs empirical SD: ratio should be in (0.4, 2.5) for n=200 +# (loose bounds for a quick test; tight bounds in benchmark) +# ============================================================ + +test_that("covariate SE / empirical SD ratio is in (0.4, 2.5) at n=200, R=50", { + skip_on_cran() + skip_if(Sys.getenv("CI") == "true" && Sys.getenv("EDID_SLOW_TESTS") != "1", + "skipping slow variance test on CI") + + n <- 200; R <- 50 + res <- run_mc(n, R) + skip_if(length(res) < 30L, "too many MC failures; skipping") + + # Collect per-cell ATTs and SEs + all_gt <- do.call(rbind, lapply(res, function(r) { + data.frame(group = r$group, time = r$time, att = r$att, se = r$se) + })) + cell_keys <- unique(all_gt[, c("group", "time")]) + + for (k in seq_len(nrow(cell_keys))) { + g_k <- cell_keys$group[k]; t_k <- cell_keys$time[k] + sub <- all_gt[all_gt$group == g_k & all_gt$time == t_k, ] + if (nrow(sub) < 20L) next + + emp_sd <- sd(sub$att) + mean_se <- mean(sub$se, na.rm = TRUE) + ratio <- mean_se / emp_sd + + expect_true(ratio > 0.4 && ratio < 2.5, + info = sprintf("ATT(%g,%g): SE ratio = %.3f (mean_se=%.3f, emp_sd=%.3f)", + g_k, t_k, ratio, mean_se, emp_sd)) + } +}) + +# ============================================================ +# EIF plug-in SE vs empirical: ratio in (0.4, 2.5) +# ============================================================ + +test_that("EIF plug-in SE matches empirical SD in expected range at n=200", { + skip_on_cran() + skip_if(Sys.getenv("CI") == "true" && Sys.getenv("EDID_SLOW_TESTS") != "1", + "skipping slow EIF variance test on CI") + + n <- 200; R <- 50 + atts_33 <- numeric(R) + ses_33 <- numeric(R) + + for (r in seq_len(R)) { + df <- sim_one_draw(n, seed = r) + fit <- tryCatch( + edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L, aggregate = "none"), + error = function(e) NULL + ) + if (is.null(fit)) next + row <- fit$att_gt[fit$att_gt$group == 3 & fit$att_gt$time == 3, ] + if (nrow(row) == 0L) next + atts_33[r] <- row$att + ses_33[r] <- row$se + } + + valid <- is.finite(atts_33) & atts_33 != 0 + skip_if(sum(valid) < 20L, "insufficient valid draws") + + emp_sd <- sd(atts_33[valid]) + mean_se <- mean(ses_33[valid]) + ratio <- mean_se / emp_sd + + expect_true(ratio > 0.4 && ratio < 2.5, + info = sprintf("ATT(3,3) SE ratio = %.3f (mean_se=%.3f, emp_sd=%.3f)", + ratio, mean_se, emp_sd)) +}) + +# ============================================================ +# Coverage: rough check at n=200 (nominal 95%, accept 70-99%) +# ============================================================ + +test_that("ATT(3,3) CI coverage is roughly nominal at n=200, R=50", { + skip_on_cran() + skip_if(Sys.getenv("CI") == "true" && Sys.getenv("EDID_SLOW_TESTS") != "1", + "skipping slow coverage test on CI") + + n <- 200; R <- 50; true_att <- 1.0 + covered <- logical(R) + + for (r in seq_len(R)) { + df <- sim_one_draw(n, seed = r) + fit <- tryCatch( + edid(df, "y", "id", "t", "g", xformla = ~ x1, seed = 1L, aggregate = "none"), + error = function(e) NULL + ) + if (is.null(fit)) { covered[r] <- FALSE; next } + row <- fit$att_gt[fit$att_gt$group == 3 & fit$att_gt$time == 3, ] + if (nrow(row) == 0L) { covered[r] <- FALSE; next } + covered[r] <- (row$ci_lower <= true_att && true_att <= row$ci_upper) + } + + cov_rate <- mean(covered) + expect_true(cov_rate >= 0.65 && cov_rate <= 0.99, + info = sprintf("ATT(3,3) coverage = %.2f at n=200 (nominal 0.95)", cov_rate)) +}) + +# ============================================================ +# No-covariate path: SE unchanged by xformla=NULL vs ~1 +# ============================================================ + +test_that("no-covariate path SEs are identical for xformla=NULL and xformla=~1", { + df <- sim_one_draw(200, seed = 99) + fit0 <- edid(df, "y", "id", "t", "g") + fit1 <- edid(df, "y", "id", "t", "g", xformla = ~1) + expect_equal(fit0$att_gt$se, fit1$att_gt$se, tolerance = 1e-12) +}) diff --git a/tests/testthat/test-edid-inference.R b/tests/testthat/test-edid-inference.R new file mode 100644 index 0000000..6ca1d48 --- /dev/null +++ b/tests/testthat/test-edid-inference.R @@ -0,0 +1,97 @@ +library(testthat) + +# ============================================================ +# 6.1 compute_eif_se_edid(): correct formula +# ============================================================ +test_that("compute_eif_se_edid() returns sqrt(sum(eif^2)/n^2)", { + set.seed(42) + n <- 50L + eif <- rnorm(n, 0, 1) + se <- compute_eif_se_edid(eif, n) + expected <- sqrt(sum(eif^2) / n^2) + expect_equal(se, expected, tolerance = 1e-12) +}) + +test_that("compute_eif_se_edid() returns non-negative value", { + set.seed(1) + eif <- rnorm(100) + expect_true(compute_eif_se_edid(eif, 100) >= 0) +}) + +# ============================================================ +# 6.2 cluster_aggregate_edid(): formula +# ============================================================ +test_that("cluster_aggregate_edid() sums EIF within clusters correctly", { + # 10 units, 2 clusters of 5 each + n <- 10L + G <- 2L + cluster_idx <- c(rep(1L, 5L), rep(2L, 5L)) + set.seed(1) + eif <- rnorm(n) + + result <- cluster_aggregate_edid(eif, cluster_idx) + # result is the cluster sums (not yet centered here -- tester checks dimension) + # Cluster 1 sum: + c1_sum <- sum(eif[1:5]) + c2_sum <- sum(eif[6:10]) + # The function should return a length-G vector + expect_equal(length(result), G) +}) + +test_that("cluster_aggregate_edid() SE with clustering differs from iid SE", { + df <- make_panel_clustered(seed = 42) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + clustervars = "cluster_id") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "post") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "post") + w <- compute_efficient_weights_edid(omega) + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "post") + att <- sum(w * y_hat) + eif <- compute_eif_nocov_edid(3L, 4L, pairs, w, panel, att, "post") + + se_iid <- compute_eif_se_edid(eif, panel$n) + clust_agg <- cluster_aggregate_edid(eif, panel$cluster_indices) + se_cluster <- safe_inference_edid(eif, panel$cluster_indices, 0.05)$se + # Cluster SE should be finite and positive + expect_true(is.finite(se_cluster)) + expect_true(se_cluster > 0) + # Cluster SE and iid SE are generally different (not testing direction) + expect_false(isTRUE(all.equal(se_iid, se_cluster))) +}) + +# ============================================================ +# 6.3 safe_inference_edid(): output structure +# ============================================================ +test_that("safe_inference_edid() returns named list with expected fields", { + set.seed(42) + eif <- rnorm(50) + res <- safe_inference_edid(eif, cluster_indices = NULL, alpha = 0.05) + expect_named(res, c("se", "ci_lower", "ci_upper", "t_stat", "p_value", "inference_valid"), + ignore.order = TRUE) +}) + +test_that("safe_inference_edid() returns inference_valid=FALSE when EIF is all zeros", { + eif <- rep(0, 50) + res <- safe_inference_edid(eif, cluster_indices = NULL, alpha = 0.05) + expect_false(res$inference_valid) + expect_true(is.na(res$se) || res$se == 0) +}) + +test_that("safe_inference_edid() CI width is positive for non-degenerate EIF", { + set.seed(10) + eif <- rnorm(100) + res <- safe_inference_edid(eif, cluster_indices = NULL, alpha = 0.05) + ci_width <- res$ci_upper - res$ci_lower + expect_true(ci_width > 0 || !res$inference_valid) +}) + +test_that("safe_inference_edid() p_value is in [0, 1]", { + set.seed(15) + eif <- rnorm(80) + res <- safe_inference_edid(eif, cluster_indices = NULL, alpha = 0.05) + if (res$inference_valid) { + expect_true(res$p_value >= 0 && res$p_value <= 1) + } +}) diff --git a/tests/testthat/test-edid-integration.R b/tests/testthat/test-edid-integration.R new file mode 100644 index 0000000..1516bd2 --- /dev/null +++ b/tests/testthat/test-edid-integration.R @@ -0,0 +1,288 @@ +library(testthat) + +# ============================================================ +# 9.1 Basic edid() call returns edid_fit object +# ============================================================ +test_that("edid() returns an edid_fit object on one-cohort panel", { + df <- make_panel_1cohort(seed = 42) + fit <- edid( + data = df, + yname = "outcome", + idname = "unit", + tname = "time", + gname = "first_treat", + pt_assumption = "all", + aggregate = "all", + bstrap = FALSE + ) + expect_s3_class(fit, "edid_fit") +}) + +# ============================================================ +# 9.2 edid_fit object fields +# ============================================================ +test_that("edid_fit contains all required top-level fields", { + df <- make_panel_1cohort(seed = 1) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "all", aggregate = "all", bstrap = FALSE) + required_fields <- c("call", "pt_assumption", "control_group", "alpha", "n", + "T_periods", "treatment_groups", "anticipation", "inference_type", + "cells", "att_gt", "overall", "event_study", "group") + for (f in required_fields) { + expect_true(f %in% names(fit), + info = paste("Missing field:", f)) + } +}) + +# ============================================================ +# 9.3 att_gt data.frame structure +# ============================================================ +test_that("edid_fit$att_gt is a data.frame with required columns", { + df <- make_panel_1cohort(seed = 2) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", aggregate = "all", bstrap = FALSE) + expect_s3_class(fit$att_gt, "data.frame") + expected_cols <- c("group", "time", "att", "se", "ci_lower", "ci_upper", "p_value", "is_pre") + for (col in expected_cols) { + expect_true(col %in% names(fit$att_gt), info = paste("Missing column:", col)) + } +}) + +# ============================================================ +# 9.4 ATT estimates are finite for post-treatment cells +# ============================================================ +test_that("edid() post-treatment ATT estimates are finite", { + df <- make_panel_1cohort(seed = 3) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", aggregate = "all", bstrap = FALSE) + post_cells <- fit$att_gt[!fit$att_gt$is_pre, ] + expect_true(all(is.finite(post_cells$att))) +}) + +# ============================================================ +# 9.5 PT-Post: ATT close to true value of 2 (large sample) +# ============================================================ +test_that("edid() PT-Post overall ATT close to 2 for ATT=2 DGP (large sample)", { + df <- make_panel_1cohort(n_treat = 300, n_never = 300, n_periods = 5, seed = 555) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", + aggregate = "overall", + bstrap = FALSE) + # True ATT = 2; allow generous tolerance for finite samples + expect_equal(fit$overall$att, 2, tolerance = 0.4) +}) + +test_that("edid() PT-All overall ATT close to 2 for ATT=2 DGP (large sample)", { + df <- make_panel_1cohort(n_treat = 300, n_never = 300, n_periods = 5, seed = 666) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "all", + aggregate = "overall", + bstrap = FALSE) + expect_equal(fit$overall$att, 2, tolerance = 0.5) +}) + +# ============================================================ +# 9.6 S3 methods work without error +# ============================================================ +test_that("print.edid_fit() runs without error", { + df <- make_panel_1cohort(seed = 4) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", aggregate = "all") + expect_output(print(fit)) +}) + +test_that("summary.edid_fit() runs without error", { + df <- make_panel_1cohort(seed = 5) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", aggregate = "all") + expect_output(summary(fit)) +}) + +test_that("coef.edid_fit() returns named numeric vector for att_gt", { + df <- make_panel_1cohort(seed = 6) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", aggregate = "all") + coefs <- coef(fit, which = "att_gt") + expect_true(is.numeric(coefs)) + expect_true(length(coefs) > 0) + expect_false(is.null(names(coefs))) +}) + +test_that("vcov.edid_fit() returns a numeric matrix", { + df <- make_panel_1cohort(seed = 7, n_treat = 40, n_never = 40) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + aggregate = "all", store_eif = TRUE) + V <- vcov(fit, which = "att_gt") + expect_true(is.matrix(V)) + expect_true(is.numeric(V)) +}) + +test_that("as.data.frame.edid_fit() returns a data.frame", { + df <- make_panel_1cohort(seed = 8) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", aggregate = "all") + df_out <- as.data.frame(fit) + expect_s3_class(df_out, "data.frame") +}) + +# ============================================================ +# 9.7 Two-cohort staggered panel +# ============================================================ +test_that("edid() two-cohort staggered panel produces two groups in group aggregation", { + df <- make_panel_2cohort(seed = 200) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "all", aggregate = "all", bstrap = FALSE) + expect_equal(length(fit$group), 2L) +}) + +test_that("edid() two-cohort: group ATTs are near true values 1.5 and 2.5 (large sample)", { + df <- make_panel_2cohort(n_g3 = 200, n_g5 = 200, n_never = 200, + n_periods = 7, seed = 300) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", aggregate = "group", bstrap = FALSE) + atts <- sapply(fit$group, function(x) x$att) + # Group g=3 should be near 1.5; group g=5 near 2.5 + expect_equal(unname(sort(atts)), c(1.5, 2.5), tolerance = 0.6) +}) + +# ============================================================ +# 9.8 Bootstrap integration +# ============================================================ +test_that("edid() with bstrap=TRUE returns bootstrap field in edid_fit", { + df <- make_panel_1cohort(seed = 42) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", aggregate = "overall", + bstrap = TRUE, biters = 50L, bootstrap_weights = "rademacher", + seed = 42L) + expect_false(is.null(fit$bootstrap)) + expect_equal(fit$bootstrap$n_bootstrap, 50L) +}) + +test_that("edid() bootstrap SE differs from analytical SE (not identical)", { + df <- make_panel_1cohort(n_treat = 40, n_never = 40, seed = 99) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", aggregate = "overall", + bstrap = TRUE, biters = 200L, seed = 42L) + # Bootstrap SE and analytical SE are related but not identical + # Both should be finite and positive + expect_true(is.finite(fit$overall$se)) + expect_true(fit$overall$se > 0) +}) + +# ============================================================ +# 9.9 Clustered edid() +# ============================================================ +test_that("edid() with clustervars argument produces finite clustered SEs", { + df <- make_panel_clustered(seed = 55) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + clustervars = "cluster_id", + pt_assumption = "post", aggregate = "overall", + bstrap = FALSE) + expect_true(is.finite(fit$overall$se)) + expect_true(fit$overall$se > 0) +}) + +# ============================================================ +# 9.10 Covariates stub +# ============================================================ +test_that("edid() errors with clear message when covariates are supplied", { + df <- make_panel_1cohort(seed = 1) + df$x1 <- rnorm(nrow(df)) + expect_error( + edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", covariates = "x1"), + regexp = "covariate|not yet implemented" + ) +}) + +# ============================================================ +# 9.11 survey_design stub +# ============================================================ +test_that("edid() errors with clear message when survey_design is supplied", { + df <- make_panel_1cohort(seed = 1) + expect_error( + edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", survey_design = list(fake = TRUE)), + regexp = "survey|not yet implemented" + ) +}) + +# ============================================================ +# 9.12 store_eif = TRUE stores the EIF matrix +# ============================================================ +test_that("edid() with store_eif=TRUE returns eif matrix of correct dimensions", { + df <- make_panel_1cohort(n_treat = 20, n_never = 20, n_periods = 4, seed = 42) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + store_eif = TRUE, aggregate = "all") + expect_false(is.null(fit$eif)) + # eif should be n x n_non_na_cells matrix + expect_equal(nrow(fit$eif), fit$n) + expect_true(ncol(fit$eif) >= 1) +}) + +# ============================================================ +# 9.13 Degenerate panel: minimal 2-period, 2-unit case +# ============================================================ +test_that("edid() on minimal degenerate panel runs without error", { + df <- make_degenerate_panel() + # PT-Post, g=2: baseline = 2-1-0 = 1 = period_1, so all cells have no valid pairs -> NA ATT + # Should return result without error (all NA cells) + expect_no_error({ + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", aggregate = "none") + }) +}) + +# ============================================================ +# 9.14 Balanced panel enforcement +# ============================================================ +test_that("edid() errors loudly on unbalanced panel", { + df <- make_panel_1cohort(seed = 1) + df <- df[-1, ] # remove one row -> unbalanced + expect_error( + edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat"), + regexp = "balanced|unbalanced" + ) +}) + +# ============================================================ +# 9.15 Inference: CI contains true value in reasonable proportion (large n sanity check) +# ============================================================ +test_that("edid() 95% CI contains 2 for large-n ATT=2 DGP", { + df <- make_panel_1cohort(n_treat = 500, n_never = 500, n_periods = 5, seed = 1234) + fit <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", aggregate = "overall", bstrap = FALSE) + ci_lo <- fit$overall$ci_lower + ci_hi <- fit$overall$ci_upper + expect_true(ci_lo < 2 && ci_hi > 2, + info = paste("CI:", ci_lo, "-", ci_hi, "does not contain 2")) +}) + +# ============================================================ +# 9.16 G=0 auto-conversion (att_gt convention) +# ============================================================ +test_that("edid() accepts G=0 for never-treated and auto-converts to Inf", { + df <- make_panel_1cohort(seed = 42) + # Replace Inf with 0 to simulate att_gt convention + df$first_treat_0 <- ifelse(df$first_treat == Inf, 0, df$first_treat) + fit_0 <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat_0", + pt_assumption = "post", aggregate = "overall", bstrap = FALSE) + fit_inf <- edid(df, yname = "outcome", idname = "unit", tname = "time", + gname = "first_treat", + pt_assumption = "post", aggregate = "overall", bstrap = FALSE) + expect_equal(fit_0$overall$att, fit_inf$overall$att, tolerance = 1e-10) +}) diff --git a/tests/testthat/test-edid-nocov.R b/tests/testthat/test-edid-nocov.R new file mode 100644 index 0000000..4ee174a --- /dev/null +++ b/tests/testthat/test-edid-nocov.R @@ -0,0 +1,190 @@ +library(testthat) + +# ============================================================ +# 5.1 compute_omega_star_nocov_edid(): return type and dimensions +# ============================================================ +test_that("compute_omega_star_nocov_edid() returns H x H symmetric numeric matrix", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 1) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + H <- nrow(pairs) + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + expect_true(is.matrix(omega)) + expect_equal(dim(omega), c(H, H)) + expect_true(isSymmetric(omega, tol = 1e-10)) +}) + +test_that("compute_omega_star_nocov_edid() is positive semi-definite (eigenvalues >= 0)", { + df <- make_panel_1cohort(n_treat = 40, n_never = 40, n_periods = 5, seed = 2) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + eigs <- eigen(omega, symmetric = TRUE, only.values = TRUE)$values + expect_true(all(eigs >= -1e-10)) # PSD up to numerical noise +}) + +# ============================================================ +# 5.2 compute_omega_star_nocov_edid(): PT-Post returns 1x1 matrix +# ============================================================ +test_that("compute_omega_star_nocov_edid() returns 1x1 matrix under PT-Post", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 3) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "post") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "post") + expect_equal(dim(omega), c(1L, 1L)) + # The 1x1 Omega* must be positive (variance is non-negative) + expect_true(omega[1, 1] >= 0) +}) + +# ============================================================ +# 5.3 compute_efficient_weights_edid(): properties +# ============================================================ +test_that("compute_efficient_weights_edid() weights sum to 1", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 4) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + w <- compute_efficient_weights_edid(omega) + expect_equal(sum(w), 1, tolerance = 1e-10) + expect_equal(length(w), nrow(pairs)) +}) + +test_that("compute_efficient_weights_edid() returns w=1 for a single pair (H=1)", { + # Construct a 1x1 omega_star + omega_1x1 <- matrix(0.5) + w <- compute_efficient_weights_edid(omega_1x1) + expect_equal(w, 1.0, tolerance = 1e-12) +}) + +test_that("compute_efficient_weights_edid() returns uniform weights when Omega* is all zeros", { + H <- 4L + omega_zero <- matrix(0, H, H) + w <- compute_efficient_weights_edid(omega_zero) + expect_equal(w, rep(1/H, H), tolerance = 1e-12) +}) + +test_that("compute_efficient_weights_edid() uses pseudoinverse fallback for singular Omega*", { + # Singular 2x2 omega (rank 1) + v <- c(1, 2) + omega_sing <- outer(v, v) * 0.1 + # Should not error; should return weights summing to 1 + w <- compute_efficient_weights_edid(omega_sing) + expect_equal(sum(w), 1, tolerance = 1e-8) + expect_equal(length(w), 2L) +}) + +test_that("compute_efficient_weights_edid() returns numeric vector with no NA or NaN", { + df <- make_panel_1cohort(n_treat = 25, n_never = 25, n_periods = 5, seed = 5) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + w <- compute_efficient_weights_edid(omega) + expect_true(all(is.finite(w))) +}) + +# ============================================================ +# 5.4 compute_generated_outcomes_nocov_edid(): shape and finiteness +# ============================================================ +test_that("compute_generated_outcomes_nocov_edid() returns length-H finite vector", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 6) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "all") + expect_equal(length(y_hat), nrow(pairs)) + expect_true(all(is.finite(y_hat))) +}) + +test_that("compute_generated_outcomes_nocov_edid() PT-Post returns length-1 vector", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 7) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "post") + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "post") + expect_equal(length(y_hat), 1L) +}) + +test_that("compute_generated_outcomes_nocov_edid() ATT=2 panel: generated outcome close to 2 for post period", { + # Panel with known ATT=2 (from make_panel_1cohort default) + df <- make_panel_1cohort(n_treat = 200, n_never = 200, n_periods = 5, seed = 99) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "post") + y_hat <- compute_generated_outcomes_nocov_edid(3L, 3L, pairs, panel, "post") + # With 200 treated, 200 never-treated, and true ATT=2, should be within 0.5 of 2 + expect_equal(y_hat[1], 2, tolerance = 0.5) +}) + +# ============================================================ +# 5.5 compute_eif_nocov_edid(): shape, finiteness, zero-mean +# ============================================================ +test_that("compute_eif_nocov_edid() returns length-n finite vector", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 8) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + n <- panel$n + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + w <- compute_efficient_weights_edid(omega) + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "all") + att_gt <- sum(w * y_hat) + eif <- compute_eif_nocov_edid(3L, 4L, pairs, w, panel, att_gt, "all") + expect_equal(length(eif), n) + expect_true(all(is.finite(eif))) +}) + +test_that("compute_eif_nocov_edid() has zero mean (up to numerical precision)", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 9) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + w <- compute_efficient_weights_edid(omega) + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "all") + att_gt <- sum(w * y_hat) + eif <- compute_eif_nocov_edid(3L, 4L, pairs, w, panel, att_gt, "all") + expect_equal(mean(eif), 0, tolerance = 1e-10) +}) + +test_that("compute_eif_nocov_edid() PT-Post: EIF has zero mean", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 10) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "post") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "post") + w <- compute_efficient_weights_edid(omega) + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "post") + att_gt <- sum(w * y_hat) + eif <- compute_eif_nocov_edid(3L, 4L, pairs, w, panel, att_gt, "post") + expect_equal(mean(eif), 0, tolerance = 1e-10) +}) + +test_that("compute_eif_nocov_edid() sum of squared EIF is positive (non-degenerate)", { + df <- make_panel_1cohort(n_treat = 30, n_never = 30, n_periods = 5, seed = 11) + panel <- prepare_edid_panel(df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat") + pairs <- enumerate_valid_pairs_edid(3L, panel$treatment_groups, panel$time_periods, + panel$period_1, "all") + omega <- compute_omega_star_nocov_edid(3L, 4L, pairs, panel, "all") + w <- compute_efficient_weights_edid(omega) + y_hat <- compute_generated_outcomes_nocov_edid(3L, 4L, pairs, panel, "all") + att_gt <- sum(w * y_hat) + eif <- compute_eif_nocov_edid(3L, 4L, pairs, w, panel, att_gt, "all") + expect_true(sum(eif^2) > 0) +}) diff --git a/tests/testthat/test-edid-pairs-validation.R b/tests/testthat/test-edid-pairs-validation.R new file mode 100644 index 0000000..26f3171 --- /dev/null +++ b/tests/testthat/test-edid-pairs-validation.R @@ -0,0 +1,377 @@ +# test-edid-pairs-validation.R +# Validation tests for enumerate_valid_pairs_edid() based on test-spec.md +# (2026-04-13 post-builder-fix spec). +# +# Covers: U1-U11 (unit), I1-I8 (integration), R1-R4 (regression), E1-E2 (edge case) + +library(testthat) + +# =========================================================================== +# SECTION 3: Unit tests for enumerate_valid_pairs_edid() +# =========================================================================== + +# --------------------------------------------------------------------------- +# Scenario U1 — Cohorts {3,5,7}, target g=3 +# --------------------------------------------------------------------------- +test_that("U1: target_g=3, cohorts={3,5,7}, periods=1:10", { + result <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L, 5L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 10L) + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf in PT-All") + expect_true(any(result$gp == 3L & result$tpre == 1L), info = "self-pair period_1 present") + expect_false(any(result$gp == 5L & result$tpre == 1L), info = "cross-pair gp=5 period_1 absent") + expect_false(any(result$gp == 7L & result$tpre == 1L), info = "cross-pair gp=7 period_1 absent") + expect_true(all(result[result$gp == 5L, "tpre"] %in% 2:4), info = "gp=5 tpre in 2:4") + expect_true(all(result[result$gp == 7L, "tpre"] %in% 2:6), info = "gp=7 tpre in 2:6") +}) + +# --------------------------------------------------------------------------- +# Scenario U2 — Cohorts {3,5,7}, target g=5 +# --------------------------------------------------------------------------- +test_that("U2: target_g=5, cohorts={3,5,7}, periods=1:10", { + result <- enumerate_valid_pairs_edid( + target_g = 5L, + treatment_groups = c(3L, 5L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 10L) + expect_equal(nrow(result[result$gp == 3L, ]), 1L, info = "gp=3 has 1 cross-pair") + expect_equal(result[result$gp == 3L, "tpre"], 2L, info = "gp=3 tpre=2") + expect_true(any(result$gp == 5L & result$tpre == 1L), info = "self-pair period_1 present") + expect_false(any(result$gp == 3L & result$tpre == 1L), info = "cross-pair gp=3 period_1 absent") + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# --------------------------------------------------------------------------- +# Scenario U3 — Cohorts {3,5,7}, target g=7 +# --------------------------------------------------------------------------- +test_that("U3: target_g=7, cohorts={3,5,7}, periods=1:10", { + result <- enumerate_valid_pairs_edid( + target_g = 7L, + treatment_groups = c(3L, 5L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 10L) + expect_true(any(result$gp == 7L & result$tpre == 1L), info = "self-pair period_1 present") + expect_false(any(result$gp == 3L & result$tpre == 1L), info = "cross gp=3 period_1 absent") + expect_false(any(result$gp == 5L & result$tpre == 1L), info = "cross gp=5 period_1 absent") + expect_equal(nrow(result[result$gp == 7L, ]), 6L, info = "gp=7 has 6 self-pairs") + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# --------------------------------------------------------------------------- +# Scenario U4 — Cohorts {4,7}, target g=4 +# --------------------------------------------------------------------------- +test_that("U4: target_g=4, cohorts={4,7}, periods=1:10", { + result <- enumerate_valid_pairs_edid( + target_g = 4L, + treatment_groups = c(4L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 8L) + expect_true(all(is.finite(result$gp)), info = "all gp finite") + expect_true(any(result$gp == 4L & result$tpre == 1L), info = "self-pair period_1 present") + expect_false(any(result$gp == 7L & result$tpre == 1L), info = "cross gp=7 period_1 absent") + expect_true(all(result[result$gp == 7L, "tpre"] %in% 2:6), info = "gp=7 tpre in 2:6") +}) + +# --------------------------------------------------------------------------- +# Scenario U5 — Cohorts {4,7}, target g=7 +# --------------------------------------------------------------------------- +test_that("U5: target_g=7, cohorts={4,7}, periods=1:10", { + result <- enumerate_valid_pairs_edid( + target_g = 7L, + treatment_groups = c(4L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 8L) + expect_true(any(result$gp == 7L & result$tpre == 1L), info = "self-pair period_1 present") + expect_false(any(result$gp == 4L & result$tpre == 1L), info = "cross gp=4 period_1 absent") + expect_equal(nrow(result[result$gp == 4L, ]), 2L, info = "gp=4 has 2 cross-pairs") + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# --------------------------------------------------------------------------- +# Scenario U6 — Single cohort {5} +# --------------------------------------------------------------------------- +test_that("U6: target_g=5, cohorts={5}, periods=1:10 (single cohort)", { + result <- enumerate_valid_pairs_edid( + target_g = 5L, + treatment_groups = c(5L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 4L) + expect_true(all(result$gp == 5L), info = "all gp=5") + expect_true(any(result$tpre == 1L), info = "period_1 present") + expect_false(any(result$tpre >= 5L), info = "no tpre >= target_g") + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# --------------------------------------------------------------------------- +# Scenario U7 — Anticipation=1, cohorts {4,7}, target g=4 +# --------------------------------------------------------------------------- +test_that("U7: target_g=4, cohorts={4,7}, anticipation=1", { + result <- enumerate_valid_pairs_edid( + target_g = 4L, + treatment_groups = c(4L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 1L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 6L) + expect_false(any(result$gp == 4L & result$tpre >= 3L), info = "no tpre >= eff_start(4)=3 for gp=4") + expect_false(any(result$gp == 7L & result$tpre >= 6L), info = "no tpre >= eff_start(7)=6 for gp=7") + expect_false(any(result$gp == 7L & result$tpre == 1L), info = "cross gp=7 period_1 absent") + expect_true(any(result$gp == 4L & result$tpre == 1L), info = "self-pair period_1 present") + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# --------------------------------------------------------------------------- +# Scenario U8 — Cross-pair with no interior tpre +# --------------------------------------------------------------------------- +test_that("U8: target_g=7, cohorts={2,7} — gp=2 has no valid cross-pair tpre", { + result <- enumerate_valid_pairs_edid( + target_g = 7L, + treatment_groups = c(2L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 6L) + expect_false(any(result$gp == 2L), info = "no pairs for gp=2 (no interior tpre in (1,2))") + expect_true(all(result$gp == 7L), info = "all pairs have gp=7 (self)") + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# --------------------------------------------------------------------------- +# Scenario U9 — PT-Post: exactly one pair +# --------------------------------------------------------------------------- +test_that("U9: PT-Post target_g=4, cohorts={4,7}, periods=1:10", { + result <- enumerate_valid_pairs_edid( + target_g = 4L, + treatment_groups = c(4L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 1L) + expect_equal(result$gp, Inf) + expect_equal(result$tpre, 3L) +}) + +# --------------------------------------------------------------------------- +# Scenario U10 — PT-Post: tpre = period_1 -> 0 rows +# --------------------------------------------------------------------------- +test_that("U10: PT-Post tpre=period_1 returns 0 pairs", { + result <- enumerate_valid_pairs_edid( + target_g = 2L, + treatment_groups = c(2L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(result), 0L) +}) + +# --------------------------------------------------------------------------- +# Scenario U11 — PT-Post: tpre not in time_periods -> 0 rows +# --------------------------------------------------------------------------- +test_that("U11: PT-Post tpre not in time_periods returns 0 pairs", { + result <- enumerate_valid_pairs_edid( + target_g = 5L, + treatment_groups = c(5L), + time_periods = c(1L, 3L, 5L, 7L, 9L), # even periods missing + period_1 = 1L, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + # tpre_val = 5 - 1 - 0 = 4; 4 NOT in time_periods + expect_equal(nrow(result), 0L) +}) + +# =========================================================================== +# SECTION 6: Edge Case Scenarios +# =========================================================================== + +# --------------------------------------------------------------------------- +# Scenario E1 — Single cohort, only period_1 pre-period +# --------------------------------------------------------------------------- +test_that("E1: target_g=2, single cohort, only period_1 as pre-period", { + result <- enumerate_valid_pairs_edid( + target_g = 2L, + treatment_groups = c(2L), + time_periods = 1:5, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + # Self-pair: tpre < 2 -> {1} = period_1. Exactly 1 row. + expect_equal(nrow(result), 1L) + expect_equal(result$gp, 2L) + expect_equal(result$tpre, 1L) +}) + +# --------------------------------------------------------------------------- +# Scenario E2 — Cross-pair cohort at effective boundary (no interior tpre) +# --------------------------------------------------------------------------- +test_that("E2: target_g=7, cohorts={3,7}, anticipation=1 — gp=3 has no valid cross-pair", { + result <- enumerate_valid_pairs_edid( + target_g = 7L, + treatment_groups = c(3L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "all", + anticipation = 1L, + never_treated_val = Inf + ) + # eff_start(3) = 2; cross-pair condition: 1 < tpre < 2 -> no integer + expect_false(any(result$gp == 3L), info = "no pairs for gp=3") + # Self-pair (gp=7): eff_start=6, tpre < 6 incl 1 -> {1,2,3,4,5} = 5 rows + expect_equal(nrow(result), 5L) + expect_false(any(is.infinite(result$gp)), info = "no gp=Inf") +}) + +# =========================================================================== +# SECTION 5 (Regression): PT-Post path unchanged +# =========================================================================== + +test_that("R: PT-Post always produces gp=Inf pairs", { + pairs <- enumerate_valid_pairs_edid( + target_g = 5L, + treatment_groups = c(3L, 5L, 7L), + time_periods = 1:10, + period_1 = 1L, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(pairs), 1L) + expect_true(all(pairs$gp == Inf), info = "PT-Post gp must be Inf") + expect_equal(pairs$tpre, 4L) +}) + +# =========================================================================== +# SECTION 7: Property-Based Invariants (100 random inputs) +# =========================================================================== + +test_that("Property: no gp=Inf in any PT-All call", { + set.seed(42L) + for (i in seq_len(100L)) { + # Generate random staggered design + n_cohorts <- sample(2:5, 1L) + max_period <- sample(10:20, 1L) + # Cohort values: distinct, between 3 and max_period-1 + cohorts <- sort(sample(3:(max_period - 1L), n_cohorts, replace = FALSE)) + target <- cohorts[sample(seq_along(cohorts), 1L)] + periods <- seq_len(max_period) + period1 <- 1L + + pairs <- enumerate_valid_pairs_edid( + target_g = target, + treatment_groups = cohorts, + time_periods = periods, + period_1 = period1, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_true( + all(is.finite(pairs$gp)), + info = paste0("gp=Inf found for target_g=", target, ", cohorts={", paste(cohorts, collapse=","), "}") + ) + } +}) + +test_that("Property: self-pair includes period_1 (when valid tpre exist)", { + set.seed(123L) + for (i in seq_len(50L)) { + n_cohorts <- sample(1:4, 1L) + max_period <- sample(8:15, 1L) + cohorts <- sort(sample(3:(max_period - 1L), n_cohorts, replace = FALSE)) + target <- cohorts[sample(seq_along(cohorts), 1L)] + periods <- seq_len(max_period) + + pairs <- enumerate_valid_pairs_edid( + target_g = target, + treatment_groups = cohorts, + time_periods = periods, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L + ) + if (nrow(pairs) > 0L) { + # If self-pair has any rows, period_1 should be among them + self_rows <- pairs[pairs$gp == target, ] + if (nrow(self_rows) > 0L) { + expect_true( + any(self_rows$tpre == 1L), + info = paste0("self-pair for target_g=", target, " missing period_1") + ) + } + } + } +}) + +test_that("Property: cross-pair excludes period_1", { + set.seed(456L) + for (i in seq_len(50L)) { + n_cohorts <- sample(2:5, 1L) + max_period <- sample(8:15, 1L) + cohorts <- sort(sample(3:(max_period - 1L), n_cohorts, replace = FALSE)) + target <- cohorts[sample(seq_along(cohorts), 1L)] + periods <- seq_len(max_period) + + pairs <- enumerate_valid_pairs_edid( + target_g = target, + treatment_groups = cohorts, + time_periods = periods, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L + ) + cross_rows <- pairs[pairs$gp != target, ] + if (nrow(cross_rows) > 0L) { + expect_false( + any(cross_rows$tpre == 1L), + info = paste0("cross-pair has period_1 for target_g=", target) + ) + } + } +}) diff --git a/tests/testthat/test-edid-pairs.R b/tests/testthat/test-edid-pairs.R new file mode 100644 index 0000000..507892f --- /dev/null +++ b/tests/testthat/test-edid-pairs.R @@ -0,0 +1,178 @@ +library(testthat) + +# Helper: consistent args for enumerate_valid_pairs_edid +default_treatment_groups <- c(3L, 5L) +default_time_periods <- 1:7 +default_period_1 <- 1L + +# ============================================================ +# 4.1 PT-Post: exactly one pair +# ============================================================ +test_that("enumerate_valid_pairs_edid() returns 1 pair under PT-Post for post-treatment period", { + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = default_treatment_groups, + time_periods = default_time_periods, + period_1 = default_period_1, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(pairs), 1L) + expect_equal(pairs$gp[1], Inf) + expect_equal(pairs$tpre[1], 2L) # g - 1 - anticipation = 3 - 1 - 0 = 2 +}) + +test_that("enumerate_valid_pairs_edid() returns 1 pair under PT-Post with anticipation=1", { + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L), + time_periods = 1:5, + period_1 = 1L, + pt_assumption = "post", + anticipation = 1L, + never_treated_val = Inf + ) + # baseline = g - 1 - anticipation = 3 - 1 - 1 = 1 = period_1 + # Since period_1 is excluded, should return 0 pairs + expect_equal(nrow(pairs), 0L) +}) + +test_that("enumerate_valid_pairs_edid() PT-Post baseline = period_1 returns 0 pairs", { + # When g - 1 - anticipation equals period_1, no valid pairs + pairs <- enumerate_valid_pairs_edid( + target_g = 2L, + treatment_groups = c(2L), + time_periods = 1:4, + period_1 = 1L, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + # g - 1 - 0 = 1 = period_1 -> 0 pairs + expect_equal(nrow(pairs), 0L) +}) + +# ============================================================ +# 4.2 PT-All: multiple pairs including same-cohort +# Updated 2026-04-13: gp=Inf is no longer included in PT-All; +# period_1 IS valid as tpre for self-pairs (gp == target_g). +# ============================================================ +test_that("enumerate_valid_pairs_edid() PT-All includes same-cohort comparisons but no gp=Inf", { + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L, 5L), + time_periods = 1:5, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_true(nrow(pairs) > 1L) + # Same-cohort comparison gp=3 must be present + expect_true(any(pairs$gp == 3L)) + # Never-treated comparison gp=Inf must NOT be present (PT-All uses only treated cohorts) + expect_false(any(is.infinite(pairs$gp))) + # period_1 IS valid as tpre for self-pair (gp=3, tpre=1) + expect_true(any(pairs$gp == 3L & pairs$tpre == 1L)) +}) + +test_that("enumerate_valid_pairs_edid() PT-All includes period_1 as tpre for self-pair", { + # Single cohort: gp=target_g is the only comparison cohort (self-pair). + # Self-pair includes period_1 as a valid tpre (degenerate CS DiD moment). + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L), + time_periods = 1:5, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + # period_1=1 MUST appear as tpre for the self-pair + expect_true(1L %in% pairs$tpre) + # All pairs have finite gp (no gp=Inf) + expect_true(all(is.finite(pairs$gp))) +}) + +test_that("enumerate_valid_pairs_edid() PT-All with anticipation=1 adjusts effective treatment", { + # target_g=3, anticipation=1: eff_start(3) = 2 + # Self-pair (gp=3): valid tpre < 2, includes period_1=1 -> {1} = 1 row + # No gp=Inf in PT-All + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L), + time_periods = 1:5, + period_1 = 1L, + pt_assumption = "all", + anticipation = 1L, + never_treated_val = Inf + ) + # gp=3 self-pair with eff_start=2 has tpre=1 (period_1 is valid) + expect_true(any(pairs$gp == 3L)) + expect_equal(nrow(pairs[pairs$gp == 3L, ]), 1L) + expect_equal(pairs$tpre[pairs$gp == 3L], 1L) + # No never-treated pairs in PT-All + expect_false(any(is.infinite(pairs$gp))) +}) + +# ============================================================ +# 4.3 Self-pair structure in PT-All +# Updated 2026-04-13: gp=Inf no longer exists in PT-All; +# the correct invariant is that only treated cohorts appear as gp. +# ============================================================ +test_that("enumerate_valid_pairs_edid() PT-All has only treated-cohort gp values", { + time_periods <- 1:6 + period_1 <- 1L + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L), + time_periods = time_periods, + period_1 = period_1, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + # Only gp=3 (self-pair) should appear; no gp=Inf + expect_true(all(pairs$gp == 3L)) + expect_true(all(is.finite(pairs$gp))) + # Self-pair: tpre < 3, includes period_1=1 -> {1, 2} + expect_equal(sort(pairs$tpre), c(1L, 2L)) +}) + +# ============================================================ +# 4.4 Empty pairs cases +# ============================================================ +test_that("enumerate_valid_pairs_edid() returns 0 rows when target_g is first-ever cohort with period_1 baseline", { + # g=2, periods=1:4, period_1=1: PT-Post baseline = g-1-0=1 = period_1, so 0 pairs + pairs <- enumerate_valid_pairs_edid( + target_g = 2L, + treatment_groups = c(2L), + time_periods = 1:4, + period_1 = 1L, + pt_assumption = "post", + anticipation = 0L, + never_treated_val = Inf + ) + expect_equal(nrow(pairs), 0L) + expect_true(is.data.frame(pairs)) + expect_true("gp" %in% names(pairs)) + expect_true("tpre" %in% names(pairs)) +}) + +# ============================================================ +# 4.5 Return type +# ============================================================ +test_that("enumerate_valid_pairs_edid() always returns a data.frame with gp and tpre columns", { + pairs <- enumerate_valid_pairs_edid( + target_g = 3L, + treatment_groups = c(3L), + time_periods = 1:5, + period_1 = 1L, + pt_assumption = "all", + anticipation = 0L, + never_treated_val = Inf + ) + expect_s3_class(pairs, "data.frame") + expect_named(pairs, c("gp", "tpre")) +}) diff --git a/tests/testthat/test-edid-validate.R b/tests/testthat/test-edid-validate.R new file mode 100644 index 0000000..f5fe169 --- /dev/null +++ b/tests/testthat/test-edid-validate.R @@ -0,0 +1,265 @@ +library(testthat) +# helper-edid.R is auto-loaded + +# ============================================================ +# 3.1 Valid inputs pass without error +# ============================================================ +test_that("validate_edid_inputs() passes on valid one-cohort panel", { + df <- make_panel_1cohort() + expect_silent( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ) + ) +}) + +test_that("validate_edid_inputs() passes on two-cohort panel", { + df <- make_panel_2cohort() + expect_silent( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "post", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 100L, anticipation = 0L, survey_design = NULL + ) + ) +}) + +# ============================================================ +# 3.2 Missing column names +# ============================================================ +test_that("validate_edid_inputs() errors on missing yname column", { + df <- make_panel_1cohort() + expect_error( + validate_edid_inputs( + data = df, yname = "y_outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "y_outcome" + ) +}) + +test_that("validate_edid_inputs() errors on missing tname column", { + df <- make_panel_1cohort() + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "t_var", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "t_var" + ) +}) + +# ============================================================ +# 3.3 Non-numeric outcome +# ============================================================ +test_that("validate_edid_inputs() errors on character outcome column", { + df <- make_panel_1cohort() + df$outcome <- as.character(df$outcome) + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ) + ) +}) + +# ============================================================ +# 3.4 Non-finite outcomes +# ============================================================ +test_that("validate_edid_inputs() errors on Inf outcome", { + df <- make_panel_1cohort() + df$outcome[1] <- Inf + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "finite|non-finite|Inf" + ) +}) + +test_that("validate_edid_inputs() errors on NA outcome", { + df <- make_panel_1cohort() + df$outcome[5] <- NA_real_ + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ) + ) +}) + +# ============================================================ +# 3.5 Unbalanced panel +# ============================================================ +test_that("validate_edid_inputs() errors on unbalanced panel", { + df <- make_panel_1cohort() + df_unbal <- df[-1, ] # drop one row -> unit 1 missing period 1 + expect_error( + validate_edid_inputs( + data = df_unbal, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "balanced|unbalanced" + ) +}) + +# ============================================================ +# 3.6 Duplicate (unit, time) rows +# ============================================================ +test_that("validate_edid_inputs() errors on duplicate (idname, tname) rows", { + df <- make_panel_1cohort() + df_dup <- rbind(df, df[1, ]) # duplicate first row + expect_error( + validate_edid_inputs( + data = df_dup, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "[Dd]uplicate" + ) +}) + +# ============================================================ +# 3.7 Non-absorbing treatment +# ============================================================ +test_that("validate_edid_inputs() errors on non-absorbing treatment", { + df <- make_panel_1cohort() + # Make first_treat time-varying within unit 1 + df$first_treat[df$unit == 1 & df$time == 2] <- 4L + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "absorbing|time-varying|constant" + ) +}) + +# ============================================================ +# 3.8 No never-treated units +# ============================================================ +test_that("validate_edid_inputs() errors when no never-treated units and control_group='nevertreated'", { + df <- make_panel_1cohort() + # relabel all never-treated as cohort 4 + df$first_treat[df$first_treat == Inf] <- 4L + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "never.treated|nevertreated" + ) +}) + +# ============================================================ +# 3.9 Covariate stub +# ============================================================ +test_that("validate_edid_inputs() errors when covariates supplied (stub)", { + df <- make_panel_1cohort() + df$x1 <- rnorm(nrow(df)) + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = "x1", pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "covariate|not yet implemented" + ) +}) + +# ============================================================ +# 3.10 Survey stub +# ============================================================ +test_that("validate_edid_inputs() errors when survey_design supplied (stub)", { + df <- make_panel_1cohort() + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, + survey_design = list(strata = "fake") + ), + regexp = "survey|not yet implemented" + ) +}) + +# ============================================================ +# 3.11 Invalid alp +# ============================================================ +test_that("validate_edid_inputs() errors on alp outside (0,1)", { + df <- make_panel_1cohort() + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 1.5, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ) + ) + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0, clustervars = NULL, control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ) + ) +}) + +# ============================================================ +# 3.12 Cluster column time-varying check +# ============================================================ +test_that("validate_edid_inputs() errors on time-varying cluster variable", { + df <- make_panel_clustered() + # Make cluster_id time-varying for unit 1 + df$cluster_id[df$unit == 1 & df$time == 2] <- 999L + expect_error( + validate_edid_inputs( + data = df, yname = "outcome", idname = "unit", + tname = "time", gname = "first_treat", + covariates = NULL, pt_assumption = "all", + alp = 0.05, clustervars = "cluster_id", control_group = "nevertreated", + biters = 0L, anticipation = 0L, survey_design = NULL + ), + regexp = "cluster|time.invariant|time-invariant" + ) +})