diff --git a/.github/workflows/bump-version.yaml b/.github/workflows/bump-version.yaml new file mode 100644 index 00000000..325aa0b4 --- /dev/null +++ b/.github/workflows/bump-version.yaml @@ -0,0 +1,50 @@ +name: Bump dev version on PR merge + +on: + pull_request: + types: [closed] + branches: [master, main] + +jobs: + bump-version: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + + permissions: + contents: write + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.base.ref }} + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Bump dev version in DESCRIPTION + run: | + # Extract current version + current=$(grep '^Version:' DESCRIPTION | sed 's/Version: //') + echo "Current version: $current" + + # Split into parts + IFS='.' read -ra parts <<< "$current" + major="${parts[0]}" + minor="${parts[1]}" + patch="${parts[2]}" + dev="${parts[3]:-0}" + + # Increment dev version + new_dev=$((dev + 1)) + new_version="${major}.${minor}.${patch}.${new_dev}" + echo "New version: $new_version" + + # Update DESCRIPTION + sed -i "s/^Version: .*/Version: ${new_version}/" DESCRIPTION + + # Configure git + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Commit and push + git add DESCRIPTION + git diff --cached --quiet || git commit -m "Bump version to ${new_version}" + git push diff --git a/DESCRIPTION b/DESCRIPTION index e239a2ea..7d2b5a33 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: did Title: Treatment Effects with Multiple Periods and Groups -Version: 2.3.1.903 +Version: 2.3.1.904 Authors@R: c(person("Brantly", "Callaway", email = "brantly.callaway@uga.edu", role = c("aut", "cre")), person("Pedro H. C.", "Sant'Anna", email="pedro.santanna@emory.edu", role = c("aut"))) URL: https://bcallaway11.github.io/did/, https://github.com/bcallaway11/did/ Description: The standard Difference-in-Differences (DID) setup involves two periods and two groups -- a treated group and untreated group. Many applications of DID methods involve more than two periods and have individuals that are treated at different points in time. This package contains tools for computing average treatment effect parameters in Difference in Differences setups with more than two periods and with variation in treatment timing using the methods developed in Callaway and Sant'Anna (2021) . The main parameters are group-time average treatment effects which are the average treatment effect for a particular group at a a particular time. These can be aggregated into a fewer number of treatment effect parameters, and the package deals with the cases where there is selective treatment timing, dynamic treatment effects, calendar time effects, or combinations of these. There are also functions for testing the Difference in Differences assumption, and plotting group-time average treatment effects. diff --git a/NAMESPACE b/NAMESPACE index c5dab2b9..804c402f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,12 +38,15 @@ export(splot) export(test.mboot) export(tidy) export(trimmer) -import(BMisc) import(data.table) import(fastglm) import(ggplot2) -import(stats) -import(utils) +importFrom(BMisc,TorF) +importFrom(BMisc,getListElement) +importFrom(BMisc,makeBalancedPanel) +importFrom(BMisc,multiplier_bootstrap) +importFrom(BMisc,rhs.vars) +importFrom(BMisc,toformula) importFrom(DRDID,drdid_panel) importFrom(DRDID,drdid_rc) importFrom(DRDID,reg_did_panel) @@ -55,5 +58,20 @@ importFrom(generics,glance) importFrom(generics,tidy) importFrom(methods,as) importFrom(methods,is) +importFrom(stats,aggregate) +importFrom(stats,binomial) +importFrom(stats,complete.cases) +importFrom(stats,cov) +importFrom(stats,model.frame) +importFrom(stats,model.matrix) +importFrom(stats,na.pass) importFrom(stats,nobs) +importFrom(stats,pchisq) +importFrom(stats,pnorm) +importFrom(stats,qnorm) +importFrom(stats,quantile) +importFrom(stats,rnorm) +importFrom(stats,setNames) +importFrom(stats,var) importFrom(tidyr,gather) +importFrom(utils,globalVariables) diff --git a/NEWS.md b/NEWS.md index 6760543c..66a9a8bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,31 @@ +# did 2.3.1.904 + + * Fixed bug where `faster_mode = TRUE` and `faster_mode = FALSE` produced different ATT estimates when sampling weights (`weightsname`) vary across time. The fast path was always using first-period weights; it now correctly uses the same period's weights as the slow path + + * New `fix_weights` argument in `att_gt()` gives users explicit control over how time-varying sampling weights are resolved in each 2x2 DiD comparison. Options: `NULL` (default, preserves existing behavior), `"varying"` (per-observation weights using RC estimators), `"base_period"` (fix at g-1 for all cells), `"first_period"` (fix at first period). See `?att_gt` for details + + * Runtime message when time-varying weights are detected in balanced panel data, directing users to the `fix_weights` argument + + * Reduced namespace pollution: replaced blanket `import(stats)`, `import(utils)`, and `import(BMisc)` with selective `importFrom()` calls. The `did` package no longer re-exports `stats::filter` or `stats::lag`, which previously masked `dplyr::filter` and `dplyr::lag` when both packages were loaded + + * Fixed `aggte()` crash (`"Error in get(gname): invalid first argument"`) when the user's group column is literally named `gname` and `dreamerr` >= 1.5.0 is installed. The issue was `dreamerr` intercepting `data.table`'s `get()` inside `[.data.table`; replaced with `set()` which is immune to this + + * Expanded `weightsname` documentation explaining how time-varying weights are handled differently for balanced panels vs. repeated cross sections and unbalanced panels + + * Added `nobs()` S3 methods for `MP` and `AGGTEobj` objects, returning the number of unique cross-sectional units as an integer + + * Added `statistic` (t-statistic) and `p.value` (pointwise, two-sided) columns to `tidy()` output for both `MP` and `AGGTEobj` objects, following `broom` conventions + + * Added `broom` to `Suggests` + +# did 2.3.1.903 + + * Added `nobs()` S3 methods and `statistic`/`p.value` columns to `tidy()` output (superseded by 2.3.1.904 entry above) + +# did 2.3.1.902 + + * Bug fixes, diagnostic improvements, and JEL replication tests + # did 2.3.1.901 * `att_gt()` now accepts `...` (dots) for passing additional arguments to custom `est_method` functions diff --git a/R/DIDparams.R b/R/DIDparams.R index 7aafd3a5..7953d8a5 100644 --- a/R/DIDparams.R +++ b/R/DIDparams.R @@ -25,6 +25,7 @@ DIDparams <- function(yname, control_group, anticipation=0, weightsname=NULL, + fix_weights=NULL, alp=0.05, bstrap=TRUE, biters=1000, @@ -54,6 +55,7 @@ DIDparams <- function(yname, control_group=control_group, anticipation=anticipation, weightsname=weightsname, + fix_weights=fix_weights, alp=alp, bstrap=bstrap, biters=biters, diff --git a/R/DIDparams2.R b/R/DIDparams2.R index 108b50bf..8c88a2c0 100644 --- a/R/DIDparams2.R +++ b/R/DIDparams2.R @@ -49,6 +49,8 @@ DIDparams2 <- function(did_tensors, args, call=NULL) { covariates_matrix <- did_tensors$covariates_matrix cluster_vector <- did_tensors$cluster weights_vector <- did_tensors$weights + weights_tensor <- did_tensors$weights_tensor + fix_weights <- args$fix_weights out <- list(yname=yname, @@ -89,6 +91,8 @@ DIDparams2 <- function(did_tensors, args, call=NULL) { covariates_matrix = covariates_matrix, cluster_vector=cluster_vector, weights_vector=weights_vector, + weights_tensor=weights_tensor, + fix_weights=fix_weights, call=call) class(out) <- "DIDparams" return(out) diff --git a/R/att_gt.R b/R/att_gt.R index b5663b41..ce086373 100644 --- a/R/att_gt.R +++ b/R/att_gt.R @@ -16,7 +16,50 @@ #' It defines which "group" a unit belongs to. It should be 0 for units #' in the untreated group. #' @param weightsname The name of the column containing the sampling weights. -#' If not set, all observations have same weight. +#' If not set, all observations have same weight. When weights are +#' time-invariant (constant within each unit across periods), all +#' \code{fix_weights} options produce identical results and no special +#' handling is needed. +#' +#' When weights vary across time (e.g., time-varying population sizes), +#' the default behavior differs by panel type: +#' \describe{ +#' \item{Balanced panel}{Each 2x2 DiD comparison uses the weight from the +#' earlier of the two time periods involved. For post-treatment cells, +#' this is the base period (g-1). For pre-treatment cells with +#' \code{base_period="varying"}, this is the pre-treatment period itself. +#' The panel DRDID estimators are used.} +#' \item{Repeated cross sections and unbalanced panels}{Both periods' +#' per-observation weights are passed directly to the RC DRDID estimators, +#' so each observation carries its own period-specific weight.} +#' } +#' Use the \code{fix_weights} argument to override the default behavior. +#' @param fix_weights Controls how time-varying sampling weights are resolved. +#' Only relevant when weights vary across time; with time-invariant weights, +#' all options produce identical results. Options: +#' \describe{ +#' \item{\code{NULL} (default)}{For balanced panel: uses the weight from +#' the earlier of the two time periods in each 2x2 comparison. For +#' post-treatment cells, this is the base period (g-1). For +#' pre-treatment cells, this depends on the \code{base_period} setting. +#' For RC/unbalanced panel: uses per-observation weights from both +#' periods.} +#' \item{\code{"varying"}}{Uses per-observation, period-specific weights +#' for all panel types. For balanced panel data, this switches to the +#' repeated cross-section DRDID estimators so that pre-period and +#' post-period observations each carry their own weight. This is the +#' most flexible option but sacrifices the efficiency of the panel +#' estimator. For RC/unbalanced panel, this is identical to the +#' default.} +#' \item{\code{"base_period"}}{Fixes weights at the base period (g-1) for +#' all (g,t) cells within a group, for both pre-treatment and +#' post-treatment comparisons. Ensures all cells within a group use the +#' same weights. For RC/unbalanced panel, units not observed in the base +#' period are dropped with a warning.} +#' \item{\code{"first_period"}}{Fixes weights at the first time period in +#' the dataset for all (g,t) cells. For RC/unbalanced panel, units not +#' observed in the first period are dropped with a warning.} +#' } #' @param alp the significance level, default is 0.05 #' @param bstrap Boolean for whether or not to compute standard errors using #' the multiplier bootstrap. If standard errors are clustered, then one @@ -195,6 +238,7 @@ att_gt <- function(yname, control_group = c("nevertreated", "notyettreated"), anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = TRUE, cband = TRUE, @@ -217,6 +261,14 @@ att_gt <- function(yname, "\". Extra arguments are only passed to custom est_method functions.") } + # Validate fix_weights + if (!is.null(fix_weights)) { + if (!is.character(fix_weights) || length(fix_weights) != 1 || + !(fix_weights %in% c("varying", "base_period", "first_period"))) { + stop("fix_weights must be NULL or one of \"varying\", \"base_period\", or \"first_period\".") + } + } + # Validate est_method if (!inherits(est_method, "function")) { if (!is.character(est_method) || length(est_method) != 1) { @@ -249,6 +301,7 @@ att_gt <- function(yname, control_group = control_group, anticipation = anticipation, weightsname = weightsname, + fix_weights = fix_weights, alp = alp, bstrap = bstrap, cband = cband, @@ -284,6 +337,7 @@ att_gt <- function(yname, control_group = control_group, anticipation = anticipation, weightsname = weightsname, + fix_weights = fix_weights, alp = alp, bstrap = bstrap, cband = cband, diff --git a/R/compute.aggte.R b/R/compute.aggte.R index a93ac752..ec02770e 100644 --- a/R/compute.aggte.R +++ b/R/compute.aggte.R @@ -57,7 +57,7 @@ compute.aggte <- function(MP, } if (isTRUE(dp$faster_mode)) { dt <- dp$data - dt[get(gname) == Inf, (gname) := 0] # going back to the old way + set(dt, i = which(dt[[gname]] == Inf), j = gname, value = 0) # going back to the old way data <- as.data.frame(dt) rm(dt) tlist <- dp$time_periods diff --git a/R/compute.att_gt.R b/R/compute.att_gt.R index bef72c36..ffa8af7e 100644 --- a/R/compute.att_gt.R +++ b/R/compute.att_gt.R @@ -65,10 +65,20 @@ compute.att_gt <- function(dp) { # never treated option nevertreated <- (control_group[1] == "nevertreated") + fix_weights <- dp$fix_weights + # Pre-extract columns to avoid repeated get() inside data.table (which is slow) g_col <- data[[gname]] t_col <- data[[tname]] + # Build weight lookup by period for fix_weights options (balanced panel only) + if (!is.null(fix_weights) && panel) { + weights_by_period <- list() + for (tp in seq_along(tlist)) { + weights_by_period[[tp]] <- data[t_col == tlist[tp], .w] + } + } + if (nevertreated) { set(data, j = ".C", value = as.integer(g_col == 0)) } @@ -193,7 +203,20 @@ compute.att_gt <- function(dp) { # base period, then the "base period" is actually the later period Ypre <- if (tlist[(t + tfac)] > tlist[pret]) disdat$.y0 else disdat$.y1 Ypost <- if (tlist[(t + tfac)] > tlist[pret]) disdat$.y1 else disdat$.y0 - w <- disdat$.w + + # Select weights based on fix_weights + if (is.null(fix_weights)) { + # Default: .w from get_wide_data (earlier period) + w <- disdat$.w + } else if (fix_weights == "base_period") { + w <- weights_by_period[[pret_g]][disidx] + } else if (fix_weights == "first_period") { + w <- weights_by_period[[1L]][disidx] + } else if (fix_weights == "varying") { + w <- disdat$.w # will be overridden below when switching to RC estimator + } else { + w <- disdat$.w + } # matrix of covariates covariates <- model.matrix(xformla, data = disdat) @@ -247,7 +270,39 @@ compute.att_gt <- function(dp) { #----------------------------------------------------------------------------- attgt <- tryCatch({ - if (inherits(est_method, "function")) { + if (!is.null(fix_weights) && fix_weights == "varying") { + # fix_weights = "varying": use RC estimators with per-period weights + # Go back to long-format data for this (g,t) cell + disdat_long <- data[time_mask] + disdat_long_idx <- disdat_long$.G == 1 | disdat_long$.C == 1 + disdat_long <- disdat_long[disdat_long_idx] + Y_rc <- disdat_long[[yname]] + G_rc <- disdat_long$.G + post_rc <- as.numeric(disdat_long[[tname]] == tlist[t + tfac]) + w_rc <- disdat_long$.w + covariates_rc <- model.matrix(xformla, data = disdat_long) + n1_rc <- sum(G_rc + disdat_long$.C) # careful: n1 for RC is different + + if (inherits(est_method, "function")) { + res <- do.call(est_method, c(list( + y = Y_rc, post = post_rc, + D = G_rc, covariates = covariates_rc, + i.weights = w_rc, inffunc = TRUE + ), extra_args)) + } else if (est_method == "ipw") { + res <- DRDID::std_ipw_did_rc(Y_rc, post_rc, G_rc, + covariates = covariates_rc, + i.weights = w_rc, boot = FALSE, inffunc = TRUE) + } else if (est_method == "reg") { + res <- DRDID::reg_did_rc(Y_rc, post_rc, G_rc, + covariates = covariates_rc, + i.weights = w_rc, boot = FALSE, inffunc = TRUE) + } else { + res <- DRDID::drdid_rc(Y_rc, post_rc, G_rc, + covariates = covariates_rc, + i.weights = w_rc, boot = FALSE, inffunc = TRUE) + } + } else if (inherits(est_method, "function")) { # user-specified function res <- do.call(est_method, c(list( y1 = Ypost, y0 = Ypre, @@ -281,7 +336,16 @@ compute.att_gt <- function(dp) { # adjust influence function to account for only using # subgroup to estimate att(g,t) - res$att.inf.func <- (n / n1) * res$att.inf.func + if (!is.null(fix_weights) && fix_weights == "varying") { + # RC influence function has 2*n1 rows (stacked pre + post); + # aggregate back to unit level by summing pre and post contributions + inf_rc <- res$att.inf.func + n1_half <- length(inf_rc) %/% 2L + res$att.inf.func <- inf_rc[1:n1_half] + inf_rc[(n1_half + 1):(2 * n1_half)] + res$att.inf.func <- (n / n1) * res$att.inf.func + } else { + res$att.inf.func <- (n / n1) * res$att.inf.func + } res }, error = function(e) { warning("Error computing internal 2x2 DiD for (g, t) = (", glist[g], ", ", tlist[t + tfac], "): ", e$message, ". The ATT for this cell will be set to NA.") @@ -325,7 +389,38 @@ compute.att_gt <- function(dp) { post <- 1 * (disdat[[tname]] == tlist[t + tfac]) # num obs. for computing ATT(g,t), have to be careful here n1 <- sum(G + C) - w <- disdat$.w + + # Handle fix_weights for RC/unbalanced panel + if (!is.null(fix_weights) && fix_weights %in% c("base_period", "first_period")) { + # Determine which period's weight to use + if (fix_weights == "base_period") { + target_period <- tlist[pret_g] + } else { + target_period <- tlist[1] + } + # Build lookup: weight from target period per unit + target_rows <- data[t_col == target_period, ] + target_w <- stats::setNames(target_rows$.w, target_rows$.rowid) + # Look up weight for each observation's unit + w <- as.numeric(target_w[as.character(disdat$.rowid)]) + # Drop units not observed in the target period + missing_w <- is.na(w) + if (any(missing_w)) { + n_dropped <- length(unique(disdat$.rowid[missing_w])) + warning(paste0("Dropped ", n_dropped, " units not observed in ", + fix_weights, " (period ", target_period, ") ", + "for group ", glist[g], " in time period ", tlist[t + tfac])) + disdat <- disdat[!missing_w, ] + G <- disdat$.G + C <- disdat$.C + Y <- disdat[[yname]] + post <- 1 * (disdat[[tname]] == tlist[t + tfac]) + n1 <- sum(G + C) + w <- w[!missing_w] + } + } else { + w <- disdat$.w + } #----------------------------------------------------------------------------- # checks to make sure that we have enough observations diff --git a/R/compute.att_gt2.R b/R/compute.att_gt2.R index bfdf3eae..7d7e16d9 100644 --- a/R/compute.att_gt2.R +++ b/R/compute.att_gt2.R @@ -88,12 +88,12 @@ get_did_cohort_index <- function(group, time, tfac, pret, dp2){ #' #' @return A list containing the estimated ATT and the influence function vector. #' @noRd -run_DRDID <- function(cohort_data, covariates, dp2, g_val = NULL, t_val = NULL){ +run_DRDID <- function(cohort_data, covariates, dp2, g_val = NULL, t_val = NULL, force_rc = FALSE){ extra_args <- if (is.null(dp2$extra_args)) list() else dp2$extra_args gt_label <- if (!is.null(g_val) && !is.null(t_val)) paste0(" for group ", g_val, " in time period ", t_val) else "" - if(dp2$panel){ + if(dp2$panel && !force_rc){ # -------------------------------------- # Panel Data # -------------------------------------- @@ -376,25 +376,88 @@ run_att_gt_estimation <- function(g, t, dp2){ if(dp2$panel){ - cohort_data <- data.table(did_cohort_index, dp2$outcomes_tensor[[t+tfac]], dp2$outcomes_tensor[[pret]], dp2$weights_vector) - names(cohort_data) <- c("D", "y1", "y0", "i.weights") - covariates <- dp2$covariates_tensor[[base::min(pret, t)]] + # Determine which weight period to use based on fix_weights + use_rc_for_weights <- (!is.null(dp2$fix_weights) && dp2$fix_weights == "varying") + + if (use_rc_for_weights) { + # fix_weights = "varying": stack into RC format with per-period weights + n_units <- length(did_cohort_index) + cohort_data <- data.table( + D = rep(did_cohort_index, 2), + y = c(dp2$outcomes_tensor[[pret]], dp2$outcomes_tensor[[t+tfac]]), + post = rep(c(0L, 1L), each = n_units), + i.weights = c(dp2$weights_tensor[[pret]], dp2$weights_tensor[[t+tfac]]) + ) + # Stack covariates for both periods + cov_pre <- dp2$covariates_tensor[[pret]] + cov_post <- dp2$covariates_tensor[[t+tfac]] + if (is.matrix(cov_pre)) { + covariates <- rbind(cov_pre, cov_post) + } else { + covariates <- c(cov_pre, cov_post) + } + } else { + # Default or fixed weight options: use panel estimator with single weight vector + if (is.null(dp2$fix_weights)) { + # Default: weight from earlier of the two periods + w_idx <- base::min(pret, t) + } else if (dp2$fix_weights == "base_period") { + w_idx <- dp2$.pret_by_group[g] + } else if (dp2$fix_weights == "first_period") { + w_idx <- 1L + } + cohort_data <- data.table(did_cohort_index, dp2$outcomes_tensor[[t+tfac]], + dp2$outcomes_tensor[[pret]], dp2$weights_tensor[[w_idx]]) + names(cohort_data) <- c("D", "y1", "y0", "i.weights") + covariates <- dp2$covariates_tensor[[base::min(pret, t)]] + } } else { log_vec <- dp2$time_invariant_data[[ dp2$tname ]] == dp2$time_periods[t+tfac] # convert TRUE/FALSE to 1/0 in place (fastest) set(dp2$time_invariant_data, j = "post", value = as.integer(log_vec)) - cohort_data <- data.table(did_cohort_index, dp2$time_invariant_data[[dp2$yname]], dp2$time_invariant_data$post, dp2$time_invariant_data$weights, dp2$time_invariant_data$.rowid) + + # Handle fix_weights for RC/unbalanced panel + if (!is.null(dp2$fix_weights) && dp2$fix_weights %in% c("base_period", "first_period")) { + if (dp2$fix_weights == "base_period") { + target_period <- dp2$time_periods[dp2$.pret_by_group[g]] + } else { + target_period <- dp2$time_periods[1] + } + # Build weight lookup from target period + tid <- dp2$time_invariant_data + target_mask <- tid[[dp2$tname]] == target_period + target_ids <- tid[[dp2$idname]][target_mask] + target_ws <- tid[["weights"]][target_mask] + target_w_lookup <- stats::setNames(target_ws, as.character(target_ids)) + # Look up weight for each observation + obs_ids <- as.character(tid[[dp2$idname]]) + fixed_w <- as.numeric(target_w_lookup[obs_ids]) + # Units not in target period get NA weight — will be filtered in run_DRDID + cohort_data <- data.table(did_cohort_index, tid[[dp2$yname]], tid$post, fixed_w, tid$.rowid) + } else { + cohort_data <- data.table(did_cohort_index, dp2$time_invariant_data[[dp2$yname]], dp2$time_invariant_data$post, dp2$time_invariant_data$weights, dp2$time_invariant_data$.rowid) + } names(cohort_data) <- c("D", "y", "post", "i.weights", ".rowid") covariates <- dp2$covariates_matrix } # run estimation - did_result <- tryCatch(run_DRDID(cohort_data, covariates, dp2, g_val = dp2$treated_groups[g], t_val = dp2$time_periods[t+tfac]), + force_rc <- if (exists("use_rc_for_weights") && isTRUE(use_rc_for_weights)) TRUE else FALSE + did_result <- tryCatch(run_DRDID(cohort_data, covariates, dp2, g_val = dp2$treated_groups[g], t_val = dp2$time_periods[t+tfac], force_rc = force_rc), error = function(e) { warning("Error computing internal 2x2 DiD for (g, t) = (", dp2$treated_groups[g], ", ", dp2$time_periods[t+tfac], "): ", e$message, ". The ATT for this cell will be set to NA.") return(NULL) }) + + # When force_rc on balanced panel, the influence function has 2*id_count rows + # (stacked pre + post). Aggregate back to id_count by summing pre + post contributions. + if (force_rc && !is.null(did_result) && dp2$panel) { + inf <- did_result$inf_func + n_half <- length(inf) %/% 2L + did_result$inf_func <- inf[1:n_half] + inf[(n_half + 1):(2L * n_half)] + } + return(did_result) } diff --git a/R/imports.R b/R/imports.R index b2bc0bf3..ea0c8ad7 100644 --- a/R/imports.R +++ b/R/imports.R @@ -6,10 +6,12 @@ #' @keywords internal "_PACKAGE" -#' @import stats -#' @import utils +#' @importFrom stats pnorm qnorm pchisq quantile cov aggregate setNames +#' model.frame model.matrix na.pass complete.cases binomial rnorm var +#' @importFrom utils globalVariables #' @import ggplot2 -#' @import BMisc +#' @importFrom BMisc toformula rhs.vars makeBalancedPanel getListElement +#' multiplier_bootstrap TorF #' @import data.table #' @import fastglm #' @importFrom tidyr gather diff --git a/R/pre_process_did.R b/R/pre_process_did.R index 3ee13c5e..e51caf90 100644 --- a/R/pre_process_did.R +++ b/R/pre_process_did.R @@ -21,6 +21,7 @@ pre_process_did <- function(yname, control_group = c("nevertreated","notyettreated"), anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = FALSE, cband = FALSE, @@ -95,6 +96,20 @@ pre_process_did <- function(yname, if (".w" %in% colnames(data)) stop("Your data already contains a column named '.w', which is reserved for internal use by `did`. Please rename this column before calling att_gt().") data$.w <- w + # Check for time-varying weights in panel data + if (!is.null(weightsname) && panel) { + w_by_id <- tapply(data[, weightsname], data[, idname], function(x) max(x) - min(x)) + if (any(w_by_id > .Machine$double.eps^0.5, na.rm = TRUE)) { + message( + "Time-varying weights detected. For balanced panel data, the default ", + "behavior uses the weight from the earlier of the two time periods in ", + "each 2x2 comparison (the base period for post-treatment cells). ", + "Use the 'fix_weights' argument to control this behavior. ", + "See ?att_gt for details." + ) + } + } + # Outcome variable will be denoted by y # data$.y <- data[, yname] @@ -389,6 +404,7 @@ pre_process_did <- function(yname, control_group=control_group, anticipation=anticipation, weightsname=weightsname, + fix_weights=fix_weights, alp=alp, bstrap=bstrap, biters=biters, diff --git a/R/pre_process_did2.R b/R/pre_process_did2.R index 733fbe56..dd3f52ae 100644 --- a/R/pre_process_did2.R +++ b/R/pre_process_did2.R @@ -131,6 +131,20 @@ did_standardization <- function(data, args){ weights <- weights/mean(weights) data$weights <- weights + # Check for time-varying weights in panel data + if (!is.null(args$weightsname) && args$panel) { + w_range <- data[, .(w_range = max(weights) - min(weights)), by = get(args$idname)] + if (any(w_range$w_range > .Machine$double.eps^0.5, na.rm = TRUE)) { + message( + "Time-varying weights detected. For balanced panel data, the default ", + "behavior uses the weight from the earlier of the two time periods in ", + "each 2x2 comparison (the base period for post-treatment cells). ", + "Use the 'fix_weights' argument to control this behavior. ", + "See ?att_gt for details." + ) + } + } + # get a list of dates from min to max tlist <- data[, sort(unique(get(args$tname)))] @@ -419,6 +433,13 @@ get_did_tensors <- function(data, args){ start <- (time - 1L) * n + 1L outcomes_tensor[[time]] <- y_vec[start:(start + n - 1L)] } + # Build weights tensor: one weight vector per time period + w_vec <- data[["weights"]] + weights_tensor <- vector("list", nT) + for(time in seq_len(nT)){ + start <- (time - 1L) * n + 1L + weights_tensor[[time]] <- w_vec[start:(start + n - 1L)] + } } else { # for(time in args$time_periods){ # outcome_vector_time <- rep(NA, args$id_count) # Initialize vector with NAs @@ -430,6 +451,7 @@ get_did_tensors <- function(data, args){ # data[, outcome_vector_time := NULL] # } outcomes_tensor <- NULL + weights_tensor <- NULL } # Getting the time invariant data @@ -533,7 +555,8 @@ get_did_tensors <- function(data, args){ covariates_matrix = covariates_matrix, covariates_tensor = covariates_tensor, cluster = cluster, - weights = weights)) + weights = weights, + weights_tensor = weights_tensor)) } #' @title Process `did` Function Arguments @@ -559,6 +582,7 @@ pre_process_did2 <- function(yname, control_group = c("nevertreated","notyettreated"), anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = FALSE, cband = FALSE, diff --git a/R/utility_functions.R b/R/utility_functions.R index 05e1a6d0..40ed9309 100644 --- a/R/utility_functions.R +++ b/R/utility_functions.R @@ -86,10 +86,10 @@ get_wide_data <- function(data, yname, idname, tname) { check_balance <- function(data, id_col, time_col) { # Count the number of observations per unit (idname) - panel_counts <- data[, .N, by = get(id_col)] + panel_counts <- data[, .N, by = c(id_col)] # Determine the maximum number of time periods for any unit - max_time_periods <- data[, uniqueN(get(time_col))] + max_time_periods <- data[, uniqueN(data[[time_col]])] # Check if every unit has the same number of time periods as max_time_periods is_balanced <- all(panel_counts$N == max_time_periods) diff --git a/man/DIDparams.Rd b/man/DIDparams.Rd index 30c0ff65..66890966 100644 --- a/man/DIDparams.Rd +++ b/man/DIDparams.Rd @@ -14,6 +14,7 @@ DIDparams( control_group, anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = TRUE, biters = 1000, @@ -86,7 +87,51 @@ in the treatment where units can anticipate participating in the treatment and therefore it can affect their untreated potential outcomes} \item{weightsname}{The name of the column containing the sampling weights. -If not set, all observations have same weight.} +If not set, all observations have same weight. When weights are +time-invariant (constant within each unit across periods), all +\code{fix_weights} options produce identical results and no special +handling is needed. + +When weights vary across time (e.g., time-varying population sizes), +the default behavior differs by panel type: +\describe{ +\item{Balanced panel}{Each 2x2 DiD comparison uses the weight from the +earlier of the two time periods involved. For post-treatment cells, +this is the base period (g-1). For pre-treatment cells with +\code{base_period="varying"}, this is the pre-treatment period itself. +The panel DRDID estimators are used.} +\item{Repeated cross sections and unbalanced panels}{Both periods' +per-observation weights are passed directly to the RC DRDID estimators, +so each observation carries its own period-specific weight.} +} +Use the \code{fix_weights} argument to override the default behavior.} + +\item{fix_weights}{Controls how time-varying sampling weights are resolved. +Only relevant when weights vary across time; with time-invariant weights, +all options produce identical results. Options: +\describe{ +\item{\code{NULL} (default)}{For balanced panel: uses the weight from +the earlier of the two time periods in each 2x2 comparison. For +post-treatment cells, this is the base period (g-1). For +pre-treatment cells, this depends on the \code{base_period} setting. +For RC/unbalanced panel: uses per-observation weights from both +periods.} +\item{\code{"varying"}}{Uses per-observation, period-specific weights +for all panel types. For balanced panel data, this switches to the +repeated cross-section DRDID estimators so that pre-period and +post-period observations each carry their own weight. This is the +most flexible option but sacrifices the efficiency of the panel +estimator. For RC/unbalanced panel, this is identical to the +default.} +\item{\code{"base_period"}}{Fixes weights at the base period (g-1) for +all (g,t) cells within a group, for both pre-treatment and +post-treatment comparisons. Ensures all cells within a group use the +same weights. For RC/unbalanced panel, units not observed in the base +period are dropped with a warning.} +\item{\code{"first_period"}}{Fixes weights at the first time period in +the dataset for all (g,t) cells. For RC/unbalanced panel, units not +observed in the first period are dropped with a warning.} +}} \item{alp}{the significance level, default is 0.05} diff --git a/man/att_gt.Rd b/man/att_gt.Rd index ac311920..9d7cda9c 100644 --- a/man/att_gt.Rd +++ b/man/att_gt.Rd @@ -16,6 +16,7 @@ att_gt( control_group = c("nevertreated", "notyettreated"), anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = TRUE, cband = TRUE, @@ -96,7 +97,51 @@ in the treatment where units can anticipate participating in the treatment and therefore it can affect their untreated potential outcomes} \item{weightsname}{The name of the column containing the sampling weights. -If not set, all observations have same weight.} +If not set, all observations have same weight. When weights are +time-invariant (constant within each unit across periods), all +\code{fix_weights} options produce identical results and no special +handling is needed. + +When weights vary across time (e.g., time-varying population sizes), +the default behavior differs by panel type: +\describe{ +\item{Balanced panel}{Each 2x2 DiD comparison uses the weight from the +earlier of the two time periods involved. For post-treatment cells, +this is the base period (g-1). For pre-treatment cells with +\code{base_period="varying"}, this is the pre-treatment period itself. +The panel DRDID estimators are used.} +\item{Repeated cross sections and unbalanced panels}{Both periods' +per-observation weights are passed directly to the RC DRDID estimators, +so each observation carries its own period-specific weight.} +} +Use the \code{fix_weights} argument to override the default behavior.} + +\item{fix_weights}{Controls how time-varying sampling weights are resolved. +Only relevant when weights vary across time; with time-invariant weights, +all options produce identical results. Options: +\describe{ +\item{\code{NULL} (default)}{For balanced panel: uses the weight from +the earlier of the two time periods in each 2x2 comparison. For +post-treatment cells, this is the base period (g-1). For +pre-treatment cells, this depends on the \code{base_period} setting. +For RC/unbalanced panel: uses per-observation weights from both +periods.} +\item{\code{"varying"}}{Uses per-observation, period-specific weights +for all panel types. For balanced panel data, this switches to the +repeated cross-section DRDID estimators so that pre-period and +post-period observations each carry their own weight. This is the +most flexible option but sacrifices the efficiency of the panel +estimator. For RC/unbalanced panel, this is identical to the +default.} +\item{\code{"base_period"}}{Fixes weights at the base period (g-1) for +all (g,t) cells within a group, for both pre-treatment and +post-treatment comparisons. Ensures all cells within a group use the +same weights. For RC/unbalanced panel, units not observed in the base +period are dropped with a warning.} +\item{\code{"first_period"}}{Fixes weights at the first time period in +the dataset for all (g,t) cells. For RC/unbalanced panel, units not +observed in the first period are dropped with a warning.} +}} \item{alp}{the significance level, default is 0.05} diff --git a/man/conditional_did_pretest.Rd b/man/conditional_did_pretest.Rd index 2a8be54b..3646a6b3 100644 --- a/man/conditional_did_pretest.Rd +++ b/man/conditional_did_pretest.Rd @@ -88,7 +88,24 @@ eventually participate in the treatment, but have not participated yet.} \item{weightsname}{The name of the column containing the sampling weights. -If not set, all observations have same weight.} +If not set, all observations have same weight. When weights are +time-invariant (constant within each unit across periods), all +\code{fix_weights} options produce identical results and no special +handling is needed. + +When weights vary across time (e.g., time-varying population sizes), +the default behavior differs by panel type: +\describe{ +\item{Balanced panel}{Each 2x2 DiD comparison uses the weight from the +earlier of the two time periods involved. For post-treatment cells, +this is the base period (g-1). For pre-treatment cells with +\code{base_period="varying"}, this is the pre-treatment period itself. +The panel DRDID estimators are used.} +\item{Repeated cross sections and unbalanced panels}{Both periods' +per-observation weights are passed directly to the RC DRDID estimators, +so each observation carries its own period-specific weight.} +} +Use the \code{fix_weights} argument to override the default behavior.} \item{alp}{the significance level, default is 0.05} diff --git a/man/pre_process_did.Rd b/man/pre_process_did.Rd index 61c27dff..2cf4d735 100644 --- a/man/pre_process_did.Rd +++ b/man/pre_process_did.Rd @@ -16,6 +16,7 @@ pre_process_did( control_group = c("nevertreated", "notyettreated"), anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = FALSE, cband = FALSE, @@ -96,7 +97,51 @@ in the treatment where units can anticipate participating in the treatment and therefore it can affect their untreated potential outcomes} \item{weightsname}{The name of the column containing the sampling weights. -If not set, all observations have same weight.} +If not set, all observations have same weight. When weights are +time-invariant (constant within each unit across periods), all +\code{fix_weights} options produce identical results and no special +handling is needed. + +When weights vary across time (e.g., time-varying population sizes), +the default behavior differs by panel type: +\describe{ +\item{Balanced panel}{Each 2x2 DiD comparison uses the weight from the +earlier of the two time periods involved. For post-treatment cells, +this is the base period (g-1). For pre-treatment cells with +\code{base_period="varying"}, this is the pre-treatment period itself. +The panel DRDID estimators are used.} +\item{Repeated cross sections and unbalanced panels}{Both periods' +per-observation weights are passed directly to the RC DRDID estimators, +so each observation carries its own period-specific weight.} +} +Use the \code{fix_weights} argument to override the default behavior.} + +\item{fix_weights}{Controls how time-varying sampling weights are resolved. +Only relevant when weights vary across time; with time-invariant weights, +all options produce identical results. Options: +\describe{ +\item{\code{NULL} (default)}{For balanced panel: uses the weight from +the earlier of the two time periods in each 2x2 comparison. For +post-treatment cells, this is the base period (g-1). For +pre-treatment cells, this depends on the \code{base_period} setting. +For RC/unbalanced panel: uses per-observation weights from both +periods.} +\item{\code{"varying"}}{Uses per-observation, period-specific weights +for all panel types. For balanced panel data, this switches to the +repeated cross-section DRDID estimators so that pre-period and +post-period observations each carry their own weight. This is the +most flexible option but sacrifices the efficiency of the panel +estimator. For RC/unbalanced panel, this is identical to the +default.} +\item{\code{"base_period"}}{Fixes weights at the base period (g-1) for +all (g,t) cells within a group, for both pre-treatment and +post-treatment comparisons. Ensures all cells within a group use the +same weights. For RC/unbalanced panel, units not observed in the base +period are dropped with a warning.} +\item{\code{"first_period"}}{Fixes weights at the first time period in +the dataset for all (g,t) cells. For RC/unbalanced panel, units not +observed in the first period are dropped with a warning.} +}} \item{alp}{the significance level, default is 0.05} diff --git a/man/pre_process_did2.Rd b/man/pre_process_did2.Rd index b7b90d3a..6106e7bc 100644 --- a/man/pre_process_did2.Rd +++ b/man/pre_process_did2.Rd @@ -16,6 +16,7 @@ pre_process_did2( control_group = c("nevertreated", "notyettreated"), anticipation = 0, weightsname = NULL, + fix_weights = NULL, alp = 0.05, bstrap = FALSE, cband = FALSE, @@ -96,7 +97,51 @@ in the treatment where units can anticipate participating in the treatment and therefore it can affect their untreated potential outcomes} \item{weightsname}{The name of the column containing the sampling weights. -If not set, all observations have same weight.} +If not set, all observations have same weight. When weights are +time-invariant (constant within each unit across periods), all +\code{fix_weights} options produce identical results and no special +handling is needed. + +When weights vary across time (e.g., time-varying population sizes), +the default behavior differs by panel type: +\describe{ +\item{Balanced panel}{Each 2x2 DiD comparison uses the weight from the +earlier of the two time periods involved. For post-treatment cells, +this is the base period (g-1). For pre-treatment cells with +\code{base_period="varying"}, this is the pre-treatment period itself. +The panel DRDID estimators are used.} +\item{Repeated cross sections and unbalanced panels}{Both periods' +per-observation weights are passed directly to the RC DRDID estimators, +so each observation carries its own period-specific weight.} +} +Use the \code{fix_weights} argument to override the default behavior.} + +\item{fix_weights}{Controls how time-varying sampling weights are resolved. +Only relevant when weights vary across time; with time-invariant weights, +all options produce identical results. Options: +\describe{ +\item{\code{NULL} (default)}{For balanced panel: uses the weight from +the earlier of the two time periods in each 2x2 comparison. For +post-treatment cells, this is the base period (g-1). For +pre-treatment cells, this depends on the \code{base_period} setting. +For RC/unbalanced panel: uses per-observation weights from both +periods.} +\item{\code{"varying"}}{Uses per-observation, period-specific weights +for all panel types. For balanced panel data, this switches to the +repeated cross-section DRDID estimators so that pre-period and +post-period observations each carry their own weight. This is the +most flexible option but sacrifices the efficiency of the panel +estimator. For RC/unbalanced panel, this is identical to the +default.} +\item{\code{"base_period"}}{Fixes weights at the base period (g-1) for +all (g,t) cells within a group, for both pre-treatment and +post-treatment comparisons. Ensures all cells within a group use the +same weights. For RC/unbalanced panel, units not observed in the base +period are dropped with a warning.} +\item{\code{"first_period"}}{Fixes weights at the first time period in +the dataset for all (g,t) cells. For RC/unbalanced panel, units not +observed in the first period are dropped with a warning.} +}} \item{alp}{the significance level, default is 0.05} diff --git a/tests/testthat/test-att_gt.R b/tests/testthat/test-att_gt.R index 5763d4fe..b39f6921 100644 --- a/tests/testthat/test-att_gt.R +++ b/tests/testthat/test-att_gt.R @@ -628,6 +628,183 @@ test_that("sampling weights", { }) +# ============================================================================= +# Column naming: user columns named gname/tname/idname should not crash +# ============================================================================= + +test_that("works when user column is literally named 'gname'", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + # Rename columns to match parameter names exactly + names(data)[names(data) == "G"] <- "gname" + names(data)[names(data) == "period"] <- "tname" + names(data)[names(data) == "id"] <- "idname" + + mod <- att_gt(yname="Y", xformla=~X, data=data, tname="tname", idname="idname", + gname="gname", est_method="reg", bstrap=FALSE) + expect_false(all(is.na(mod$att))) + + # aggte should also work (this was the specific dreamerr bug) + agg <- aggte(mod, type="simple") + expect_false(is.na(agg$overall.att)) + + agg_dyn <- aggte(mod, type="dynamic") + expect_false(is.na(agg_dyn$overall.att)) +}) + +test_that("works when user column is literally named 'gname' with faster_mode", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + names(data)[names(data) == "G"] <- "gname" + names(data)[names(data) == "period"] <- "tname" + names(data)[names(data) == "id"] <- "idname" + + mod <- att_gt(yname="Y", xformla=~X, data=data, tname="tname", idname="idname", + gname="gname", est_method="reg", bstrap=FALSE, faster_mode=TRUE) + expect_false(all(is.na(mod$att))) + + agg <- aggte(mod, type="simple") + expect_false(is.na(agg$overall.att)) +}) + +# ============================================================================= +# Time-varying weights: fix_weights tests +# ============================================================================= + +test_that("time-varying weights: faster_mode matches slow mode (default fix_weights=NULL)", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + data$tv_weight <- data$period + runif(nrow(data), -0.1, 0.1) + + for (em in c("reg", "dr", "ipw")) { + for (bp in c("varying", "universal")) { + res_slow <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method=em, weightsname="tv_weight", + base_period=bp, faster_mode=FALSE, bstrap=FALSE) + res_fast <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method=em, weightsname="tv_weight", + base_period=bp, faster_mode=TRUE, bstrap=FALSE) + + expect_equal(res_slow$att, res_fast$att, tolerance=1e-10, + label=paste("ATT match:", em, bp)) + } + } +}) + +test_that("fix_weights options: faster_mode matches slow mode (balanced panel)", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + data$tv_weight <- data$period + runif(nrow(data), -0.1, 0.1) + + for (fw in c("varying", "base_period", "first_period")) { + res_slow <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method="dr", weightsname="tv_weight", + fix_weights=fw, faster_mode=FALSE, bstrap=FALSE) + res_fast <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method="dr", weightsname="tv_weight", + fix_weights=fw, faster_mode=TRUE, bstrap=FALSE) + + expect_equal(res_slow$att, res_fast$att, tolerance=1e-10, + label=paste("ATT match:", fw)) + } +}) + +test_that("time-invariant weights: all fix_weights options produce identical ATTs", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + n_ids <- length(unique(data$id)) + n_periods <- length(unique(data$period)) + data$const_weight <- rep(runif(n_ids, 1, 10), each = n_periods) + + res_default <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method="reg", weightsname="const_weight", + bstrap=FALSE) + + for (fw in c("base_period", "first_period")) { + res_fw <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method="reg", weightsname="const_weight", + fix_weights=fw, bstrap=FALSE) + expect_equal(res_default$att, res_fw$att, tolerance=1e-10, + label=paste("same ATT for", fw)) + } +}) + +test_that("message emitted for time-varying weights in balanced panel", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + data$tv_weight <- data$period * 1.0 + runif(nrow(data), 0, 0.5) + + expect_message( + att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", weightsname="tv_weight", bstrap=FALSE), + "Time-varying weights detected" + ) +}) + +test_that("no message for time-invariant weights", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + n_ids <- length(unique(data$id)) + n_periods <- length(unique(data$period)) + data$const_weight <- rep(runif(n_ids, 1, 10), each = n_periods) + + expect_no_message( + att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", weightsname="const_weight", bstrap=FALSE) + ) +}) + +test_that("notyettreated with time-varying weights: faster_mode matches", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + data$tv_weight <- data$period + runif(nrow(data), 0, 0.5) + + res_slow <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method="dr", weightsname="tv_weight", + control_group="notyettreated", faster_mode=FALSE, bstrap=FALSE) + res_fast <- att_gt(yname="Y", xformla=~X, data=data, tname="period", idname="id", + gname="G", est_method="dr", weightsname="tv_weight", + control_group="notyettreated", faster_mode=TRUE, bstrap=FALSE) + + expect_equal(res_slow$att, res_fast$att, tolerance=1e-10) +}) + +test_that("RC with time-varying weights: faster_mode matches", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + data$tv_weight <- data$period * 1.0 + runif(nrow(data), 0, 0.5) + + res_slow <- att_gt(yname="Y", data=data, tname="period", idname="id", + gname="G", est_method="reg", weightsname="tv_weight", + panel=FALSE, faster_mode=FALSE, bstrap=FALSE) + res_fast <- att_gt(yname="Y", data=data, tname="period", idname="id", + gname="G", est_method="reg", weightsname="tv_weight", + panel=FALSE, faster_mode=TRUE, bstrap=FALSE) + + expect_equal(res_slow$att, res_fast$att, tolerance=1e-10) +}) + +test_that("fix_weights validation", { + set.seed(20260401) + sp <- did::reset.sim() + data <- did::build_sim_dataset(sp) + + expect_error( + att_gt(yname="Y", data=data, tname="period", idname="id", + gname="G", fix_weights="invalid_option", bstrap=FALSE), + "fix_weights must be NULL" + ) +}) + test_that("clustered standard errors", { set.seed(09142024) # check that we can compute when clustered standard errors are supplied diff --git a/tests/testthat/test-inference.R b/tests/testthat/test-inference.R index 6a0450e4..b6c6863e 100644 --- a/tests/testthat/test-inference.R +++ b/tests/testthat/test-inference.R @@ -30,13 +30,22 @@ same_matrix_elem <- function(A, B) { temp_lib <- tempfile() dir.create(temp_lib) -remotes::install_version("did", version = "2.1.2", lib = temp_lib, repos = "http://cran.us.r-project.org") +old_did_available <- tryCatch({ + remotes::install_version("did", version = "2.1.2", lib = temp_lib, repos = "http://cran.us.r-project.org", quiet = TRUE) + TRUE +}, error = function(e) FALSE) + +if (!old_did_available) { + # Clean up and skip all tests in this file + unlink(temp_lib, recursive = TRUE) +} # install.packages( # "https://cran.r-project.org/src/contrib/did_2.1.2.tar.gz", # repos = NULL, type = "source", lib = temp_lib # ) test_that("inference with balanced panel data and aggregations", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp) @@ -170,6 +179,7 @@ test_that("inference with balanced panel data and aggregations", { test_that("inference with clustering", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp) @@ -298,6 +308,7 @@ test_that("inference with clustering", { }) test_that("same inference with unbalanced panel and panel data", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp) @@ -328,6 +339,7 @@ test_that("same inference with unbalanced panel and panel data", { test_that("inference with repeated cross sections", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp, panel = FALSE) @@ -457,6 +469,7 @@ test_that("inference with repeated cross sections", { test_that("inference with repeated cross sections and clustering", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp, panel = FALSE) @@ -586,6 +599,7 @@ test_that("inference with repeated cross sections and clustering", { test_that("inference with unbalanced panel", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp) data <- data[-3, ] @@ -719,6 +733,7 @@ test_that("inference with unbalanced panel", { }) test_that("inference with unbalanced panel and clustering", { + skip_if(!old_did_available, "did v2.1.2 not available from CRAN") sp <- did::reset.sim() data <- did::build_sim_dataset(sp) data <- data[-3, ]