diff --git a/DESCRIPTION b/DESCRIPTION index dd50567..01176ff 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: triplediff Title: Triple-Difference Estimators -Version: 0.1.0 +Version: 0.1.1 Authors@R: c(person("Marcelo", "Ortiz-Villavicencio", email = "marcelo.ortiz@emory.edu", role = c("aut", "cre")), person("Pedro H. C.", "Sant'Anna", email = "pedro.santanna@emory.edu", role = c("aut")) ) diff --git a/NEWS.md b/NEWS.md index f18d879..de6c6db 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ # triplediff 0.1.0 * Initial release of triplediff in alpha stage, functions for computing group-time average treatment effects in DDD and combining them into a smaller number of parameters are available. + +# triplediff 0.1.1 + + * Bug fix in `cluster` parameter. When user specifies a cluster variable, the function now correctly uses it for clustering standard errors performing Multiplier Bootstrap. diff --git a/R/att_dr.R b/R/att_dr.R index 5e9ad03..a36fb19 100755 --- a/R/att_dr.R +++ b/R/att_dr.R @@ -32,7 +32,6 @@ att_dr <- function(did_preprocessed) { boot <- did_preprocessed$boot nboot <- did_preprocessed$nboot alpha <- did_preprocessed$alpha - cband <- did_preprocessed$cband use_parallel <- did_preprocessed$use_parallel # to perform bootstrap cores <- did_preprocessed$cores # to perform bootstrap cband <- did_preprocessed$cband # to perform bootstrap + simult. conf. band @@ -97,7 +96,7 @@ att_dr <- function(did_preprocessed) { # get critical value to compute uniform confidence bands cv <- boot_result$unif_crit_val if(cv >= 7){ - warning("Simultaneous critical value is arguably `too large' to be realible. This usually happens when number of observations per group is small and/or there is no much variation in outcomes.") + warning("Simultaneous critical value is arguably `too large' to be reliable. This usually happens when number of observations per group is small and/or there is no much variation in outcomes.") } } else { diff --git a/R/ddd.R b/R/ddd.R index 4fd1d90..959570e 100755 --- a/R/ddd.R +++ b/R/ddd.R @@ -63,7 +63,14 @@ NULL #' #' summary(att_22) #' +#' # Performing clustered standard errors with mutiplier bootstrap #' +#' att_cluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state", +#' pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4, +#' data = df, control_group = "nevertreated", +#' base_period = "universal", est_method = "dr", cluster = "cluster") +#' +#' summary(att_cluster) #' #' #---------------------------------------------------------- #' # Triple Diff with multiple time periods @@ -297,7 +304,7 @@ ddd <- function(yname, multiple_periods = multiple_periods, # learners = args$learners, # n_folds = args$n_folds, - cband = args$cband, + cband = dp$cband, # getting from dp because it could change in the pre process cluster = args$cluster, boot = dp$boot, # getting from dp because it could change in the pre process alpha = dp$alpha, # getting from dp because it could change in the pre process diff --git a/R/preprocess.R b/R/preprocess.R index 4c72acc..5f90ff8 100755 --- a/R/preprocess.R +++ b/R/preprocess.R @@ -60,6 +60,12 @@ run_nopreprocess_2periods <- function(yname, nboot <- 999 args$nboot <- nboot } + + if(!cband){ + warning("cband = FALSE. Setting cband = TRUE for bootstrapped standard errors.") + cband <- TRUE + args$cband <- cband + } } # Flags for cluster variable @@ -78,6 +84,15 @@ run_nopreprocess_2periods <- function(yname, if (length(cluster) > 1) { stop("You can only provide 1 cluster variable additionally to the one provided in idname. Please check your arguments") } + + # check if bootstrap is on + if (!boot){ + warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.") + boot <- TRUE + args$boot <- boot + cband <- TRUE + args$cband <- cband + } } @@ -210,15 +225,6 @@ run_preprocess_2Periods <- function(yname, # Error checking #------------------------------------- - # Flag for parallel and cores - if (boot){ - if ((use_parallel) && (is.null(cores))) { - warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core as default.") - cores <- 1 - args$cores <- cores - } - } - # Flag for alpha > 0.10 if (alpha > 0.10) { warning("alpha = ", alpha, " is too high. Using alpha = 0.05 as default.") @@ -234,12 +240,18 @@ run_preprocess_2Periods <- function(yname, # } # setting default bootstrap reps - if (boot == TRUE){ + if (boot){ if (is.null(nboot)){ warning("Number of bootstrap samples not specified. Defaulting to 999 reps.") nboot <- 999 args$nboot <- nboot } + + if(!cband){ + warning("cband = FALSE. Setting cband=TRUE for bootstrapped standard errors.") + cband <- TRUE + args$cband <- cband + } } # Run argument checks @@ -265,12 +277,37 @@ run_preprocess_2Periods <- function(yname, # Check that cluster variables do not vary over time within each unit if (length(cluster) > 0) { # Efficiently check for time-varying cluster variables - clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = id, .SDcols = cluster] + clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = idname, .SDcols = cluster] # If any cluster variable varies over time within any unit, stop execution if (!all(unlist(clust_tv[, -1, with = FALSE]))) { stop("triplediff cannot handle time-varying cluster variables at the moment. Please check your cluster variable.") } } + + # check if bootstrap is on + if (!boot){ + warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.") + boot <- TRUE + args$boot <- boot + cband <- TRUE + args$cband <- cband + + # adding boot reps too + if (is.null(nboot)){ + warning("Number of bootstrap samples not specified. Defaulting to 999 reps.") + nboot <- 999 + args$nboot <- nboot + } + } + } + + # Flag for parallel and cores + if (boot){ + if ((use_parallel) && (is.null(cores))) { + warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core as default.") + cores <- 1 + args$cores <- cores + } } # set weights @@ -457,15 +494,6 @@ run_preprocess_multPeriods <- function(yname, # Error checking #------------------------------------- - # Flag for parallel and cores - if (boot){ - if ((use_parallel) && (is.null(cores))) { - warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core.") - cores <- 1 - args$cores <- cores - } - } - # Flag for alpha > 0.10 if (alpha > 0.10) { warning("alpha = ", alpha, " is too high. Using alpha = 0.05 as default.") @@ -481,12 +509,18 @@ run_preprocess_multPeriods <- function(yname, # } # setting default bootstrap reps - if (boot == TRUE){ + if (boot){ if (is.null(nboot)){ warning("Number of bootstrap samples not specified. Defaulting to 999 reps.") nboot <- 999 args$nboot <- nboot } + + if(!cband){ + warning("cband = FALSE. Setting cband=TRUE for bootstrapped standard errors.") + cband <- TRUE + args$cband <- cband + } } # Run argument checks @@ -512,12 +546,37 @@ run_preprocess_multPeriods <- function(yname, # Check that cluster variables do not vary over time within each unit if (length(cluster) > 0) { # Efficiently check for time-varying cluster variables - clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = id, .SDcols = cluster] + clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = idname, .SDcols = cluster] # If any cluster variable varies over time within any unit, stop execution if (!all(unlist(clust_tv[, -1, with = FALSE]))) { stop("triplediff cannot handle time-varying cluster variables at the moment. Please check your cluster variable.") } } + + # check if bootstrap is on + if (!boot){ + warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.") + boot <- TRUE + args$boot <- boot + cband <- TRUE + args$cband <- cband + + # adding boot reps too + if (is.null(nboot)){ + warning("Number of bootstrap samples not specified. Defaulting to 999 reps.") + nboot <- 999 + args$nboot <- nboot + } + } + } + + # Flag for parallel and cores + if (boot){ + if ((use_parallel) && (is.null(cores))) { + warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core.") + cores <- 1 + args$cores <- cores + } } # set in-blank xformla if no covariates are provided diff --git a/R/validation_check.R b/R/validation_check.R index d9d5c4a..f1978f5 100755 --- a/R/validation_check.R +++ b/R/validation_check.R @@ -19,11 +19,12 @@ validate_args_2Periods <- function(args, dta){ nboot <- args$nboot inffunc <- args$inffunc cband <- args$cband + cluster <- args$cluster # flag for boot and cband - if ((!boot) && (cband)){ - stop("cband is only available when boot = TRUE") - } + # if ((!boot) && (cband) && (!is.null(cluster))){ + # stop("Clustered SEs are only available when boot=TRUE. Please, double check your arguments.") + # } # Flag for yname if (!is.element(yname, base::colnames(dta))) { diff --git a/README.md b/README.md index 2483bc7..32d0008 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Triple Differences Estimators ![](https://img.shields.io/badge/release%20lifecycle-alpha-orange.svg) -[![](https://img.shields.io/badge/devel%20version-0.1.0-blue.svg)](https://github.com/marcelortizv/triplediff) +[![](https://img.shields.io/badge/devel%20version-0.1.1-blue.svg)](https://github.com/marcelortizv/triplediff) [![](https://img.shields.io/badge/doi-10.48550/arXiv.2505.09942-yellow.svg)](https://doi.org/10.48550/arXiv.2505.09942) diff --git a/man/ddd.Rd b/man/ddd.Rd index 78e9de0..916a9fc 100755 --- a/man/ddd.Rd +++ b/man/ddd.Rd @@ -109,7 +109,14 @@ att_22 <- ddd(yname = "y", tname = "time", idname = "id", gname = "state", summary(att_22) +# Performing clustered standard errors with mutiplier bootstrap +att_cluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state", +pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4, +data = df, control_group = "nevertreated", +base_period = "universal", est_method = "dr", cluster = "cluster") + +summary(att_cluster) #---------------------------------------------------------- # Triple Diff with multiple time periods diff --git a/tests/testthat/test-att_dr.R b/tests/testthat/test-att_dr.R index 7cc16a5..514d75c 100755 --- a/tests/testthat/test-att_dr.R +++ b/tests/testthat/test-att_dr.R @@ -7,13 +7,13 @@ test_that("multiplication works", { # Performing tests # ------------------------------ - ddd_analytical <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat", + ddd_boostrap <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat", pname = "partition", xformla = ~x1 + x2, data = test_panel, control_group = NULL, base_period = NULL, est_method = "dr", - weightsname = NULL, boot = TRUE, nboot = 1000, + weightsname = NULL, boot = TRUE, nboot = 1000, cband = TRUE, inffunc = FALSE, skip_data_checks = FALSE) - ddd_boostrap <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat", + ddd_analytical <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat", pname = "partition", xformla = ~x1 + x2, data = test_panel, control_group = NULL, base_period = NULL, est_method = "dr", weightsname = NULL, boot = FALSE, nboot = NULL, @@ -25,3 +25,28 @@ test_that("multiplication works", { # Check that standard errors are comparable expect_equal(ddd_analytical$se, ddd_boostrap$se, tolerance = 0.5) }) + +# Testing clustered standard error is working correctly +test_that("clustered standard errors are working correctly", { + # generating dataset without errors + test_panel = gen_dgp_2periods(size = 5000, dgp_type = 1)$data + + # ------------------------------ + # Performing tests + # ------------------------------ + + att_nocluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state", + pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4, base_period = "universal", + data = test_panel, control_group = "nevertreated", est_method = "dr") + + att_cluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state", + pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4, + data = test_panel, control_group = "nevertreated", boot = TRUE, nboot = 1000, cband = TRUE, + base_period = "universal", est_method = "dr", cluster = "cluster") + + # Check that point estimates are the same + expect_equal(att_nocluster$ATT, att_cluster$ATT) + + # Check that standard errors are different + expect_false(isTRUE(all.equal(att_nocluster$se, att_cluster$se))) +}) diff --git a/tests/testthat/test-output_agg_ddd.R b/tests/testthat/test-output_agg_ddd.R index 3cb8c7d..833a850 100755 --- a/tests/testthat/test-output_agg_ddd.R +++ b/tests/testthat/test-output_agg_ddd.R @@ -1,7 +1,7 @@ # Testing if agg_ddd in generating output test_that("Testing generation of output in aggregation function", { - data <- gen_dgp_mult_periods(size = 10000, dgp_type = 1)[["data"]] + data <- gen_dgp_mult_periods(size = 1000, dgp_type = 1)[["data"]] # Performing simple tests out <- ddd(yname = "y", tname = "time", idname = "id",