marcelortizv · marcelortizv · Aug 11, 2025 · Aug 8, 2025 · Aug 9, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: triplediff
 Title: Triple-Difference Estimators
-Version: 0.1.0
+Version: 0.1.1
 Authors@R: c(person("Marcelo", "Ortiz-Villavicencio", email = "marcelo.ortiz@emory.edu", role = c("aut", "cre")),
               person("Pedro H. C.", "Sant'Anna", email = "pedro.santanna@emory.edu", role = c("aut"))
             )

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
 # triplediff 0.1.0
 
   * Initial release of triplediff in alpha stage, functions for computing group-time average treatment effects in DDD and combining them into a smaller number of parameters are available.
+
+# triplediff 0.1.1
+
+  * Bug fix in `cluster` parameter. When user specifies a cluster variable, the function now correctly uses it for clustering standard errors performing Multiplier Bootstrap.
diff --git a/R/att_dr.R b/R/att_dr.R
@@ -32,7 +32,6 @@ att_dr <- function(did_preprocessed) {
   boot <- did_preprocessed$boot
   nboot <- did_preprocessed$nboot
   alpha <- did_preprocessed$alpha
-  cband <- did_preprocessed$cband
   use_parallel <- did_preprocessed$use_parallel # to perform bootstrap
   cores <- did_preprocessed$cores # to perform bootstrap
   cband <- did_preprocessed$cband # to perform bootstrap + simult. conf. band
@@ -97,7 +96,7 @@ att_dr <- function(did_preprocessed) {
       # get critical value to compute uniform confidence bands
       cv <- boot_result$unif_crit_val
       if(cv >= 7){
-        warning("Simultaneous critical value is arguably `too large' to be realible. This usually happens when number of observations per group is small and/or there is no much variation in outcomes.")
+        warning("Simultaneous critical value is arguably `too large' to be reliable. This usually happens when number of observations per group is small and/or there is no much variation in outcomes.")
       }
 
     } else {

diff --git a/R/ddd.R b/R/ddd.R
@@ -63,7 +63,14 @@ NULL
 #'
 #' summary(att_22)
 #'
+#' # Performing clustered standard errors with mutiplier bootstrap
 #'
+#' att_cluster <-  ddd(yname = "y", tname = "time", idname = "id", gname = "state",
+#' pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4,
+#' data = df, control_group = "nevertreated",
+#' base_period = "universal", est_method = "dr", cluster = "cluster")
+#'
+#' summary(att_cluster)
 #'
 #' #----------------------------------------------------------
 #' # Triple Diff with multiple time periods
@@ -297,7 +304,7 @@ ddd <- function(yname,
     multiple_periods = multiple_periods,
     # learners = args$learners,
     # n_folds = args$n_folds,
-    cband = args$cband,
+    cband = dp$cband, # getting from dp because it could change in the pre process
     cluster = args$cluster,
     boot = dp$boot, # getting from dp because it could change in the pre process
     alpha = dp$alpha, # getting from dp because it could change in the pre process

diff --git a/R/preprocess.R b/R/preprocess.R
@@ -60,6 +60,12 @@ run_nopreprocess_2periods <- function(yname,
       nboot <- 999
       args$nboot <- nboot
     }
+
+    if(!cband){
+      warning("cband = FALSE. Setting cband = TRUE for bootstrapped standard errors.")
+      cband <- TRUE
+      args$cband <- cband
+    }
   }
 
   # Flags for cluster variable
@@ -78,6 +84,15 @@ run_nopreprocess_2periods <- function(yname,
     if (length(cluster) > 1) {
       stop("You can only provide 1 cluster variable additionally to the one provided in idname. Please check your arguments")
     }
+
+    # check if bootstrap is on
+    if (!boot){
+      warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.")
+      boot <- TRUE
+      args$boot <- boot
+      cband <- TRUE
+      args$cband <- cband
+    }
   }
 
 
@@ -210,15 +225,6 @@ run_preprocess_2Periods <- function(yname,
   # Error checking
   #-------------------------------------
 
-  # Flag for parallel and cores
-  if (boot){
-    if ((use_parallel) && (is.null(cores))) {
-      warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core as default.")
-      cores <- 1
-      args$cores <- cores
-    }
-  }
-
   # Flag for alpha > 0.10
   if (alpha > 0.10) {
     warning("alpha = ", alpha, " is too high. Using alpha = 0.05 as default.")
@@ -234,12 +240,18 @@ run_preprocess_2Periods <- function(yname,
   # }
 
   # setting default bootstrap reps
-  if (boot == TRUE){
+  if (boot){
     if (is.null(nboot)){
       warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
       nboot <- 999
       args$nboot <- nboot
     }
+
+    if(!cband){
+      warning("cband = FALSE. Setting cband=TRUE for bootstrapped standard errors.")
+      cband <- TRUE
+      args$cband <- cband
+    }
   }
 
   # Run argument checks
@@ -265,12 +277,37 @@ run_preprocess_2Periods <- function(yname,
     # Check that cluster variables do not vary over time within each unit
     if (length(cluster) > 0) {
       # Efficiently check for time-varying cluster variables
-      clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = id, .SDcols = cluster]
+      clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = idname, .SDcols = cluster]
       # If any cluster variable varies over time within any unit, stop execution
       if (!all(unlist(clust_tv[, -1, with = FALSE]))) {
         stop("triplediff cannot handle time-varying cluster variables at the moment. Please check your cluster variable.")
       }
     }
+
+    # check if bootstrap is on
+    if (!boot){
+      warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.")
+      boot <- TRUE
+      args$boot <- boot
+      cband <- TRUE
+      args$cband <- cband
+
+      # adding boot reps too
+      if (is.null(nboot)){
+        warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
+        nboot <- 999
+        args$nboot <- nboot
+      }
+    }
+  }
+
+  # Flag for parallel and cores
+  if (boot){
+    if ((use_parallel) && (is.null(cores))) {
+      warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core as default.")
+      cores <- 1
+      args$cores <- cores
+    }
   }
 
   # set weights
@@ -457,15 +494,6 @@ run_preprocess_multPeriods <- function(yname,
   # Error checking
   #-------------------------------------
 
-  # Flag for parallel and cores
-  if (boot){
-    if ((use_parallel) && (is.null(cores))) {
-      warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core.")
-      cores <- 1
-      args$cores <- cores
-    }
-  }
-
   # Flag for alpha > 0.10
   if (alpha > 0.10) {
     warning("alpha = ", alpha, " is too high. Using alpha = 0.05 as default.")
@@ -481,12 +509,18 @@ run_preprocess_multPeriods <- function(yname,
   # }
 
   # setting default bootstrap reps
-  if (boot == TRUE){
+  if (boot){
     if (is.null(nboot)){
       warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
       nboot <- 999
       args$nboot <- nboot
     }
+
+    if(!cband){
+      warning("cband = FALSE. Setting cband=TRUE for bootstrapped standard errors.")
+      cband <- TRUE
+      args$cband <- cband
+    }
   }
 
   # Run argument checks
@@ -512,12 +546,37 @@ run_preprocess_multPeriods <- function(yname,
     # Check that cluster variables do not vary over time within each unit
     if (length(cluster) > 0) {
       # Efficiently check for time-varying cluster variables
-      clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = id, .SDcols = cluster]
+      clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = idname, .SDcols = cluster]
       # If any cluster variable varies over time within any unit, stop execution
       if (!all(unlist(clust_tv[, -1, with = FALSE]))) {
         stop("triplediff cannot handle time-varying cluster variables at the moment. Please check your cluster variable.")
       }
     }
+
+    # check if bootstrap is on
+    if (!boot){
+      warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.")
+      boot <- TRUE
+      args$boot <- boot
+      cband <- TRUE
+      args$cband <- cband
+
+      # adding boot reps too
+      if (is.null(nboot)){
+        warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
+        nboot <- 999
+        args$nboot <- nboot
+      }
+    }
+  }
+
+  # Flag for parallel and cores
+  if (boot){
+    if ((use_parallel) && (is.null(cores))) {
+      warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core.")
+      cores <- 1
+      args$cores <- cores
+    }
   }
 
   # set in-blank xformla if no covariates are provided

diff --git a/R/validation_check.R b/R/validation_check.R
@@ -19,11 +19,12 @@ validate_args_2Periods <- function(args, dta){
   nboot <- args$nboot
   inffunc <- args$inffunc
   cband <- args$cband
+  cluster <- args$cluster
 
   # flag for boot and cband
-  if ((!boot) && (cband)){
-    stop("cband is only available when boot = TRUE")
-  }
+  # if ((!boot) && (cband) && (!is.null(cluster))){
+  #   stop("Clustered SEs are only available when boot=TRUE. Please, double check your arguments.")
+  # }
 
   # Flag for yname
   if (!is.element(yname, base::colnames(dta))) {

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 # Triple Differences Estimators <img src="man/figures/triplediff-logo.png" align="right" alt="" width="155" />
 
 ![](https://img.shields.io/badge/release%20lifecycle-alpha-orange.svg)
-[![](https://img.shields.io/badge/devel%20version-0.1.0-blue.svg)](https://github.com/marcelortizv/triplediff)
+[![](https://img.shields.io/badge/devel%20version-0.1.1-blue.svg)](https://github.com/marcelortizv/triplediff)
 [![](https://img.shields.io/badge/doi-10.48550/arXiv.2505.09942-yellow.svg)](https://doi.org/10.48550/arXiv.2505.09942)
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->

diff --git a/man/ddd.Rd b/man/ddd.Rd
diff --git a/tests/testthat/test-att_dr.R b/tests/testthat/test-att_dr.R
@@ -7,13 +7,13 @@ test_that("multiplication works", {
   # Performing tests
   # ------------------------------
 
-  ddd_analytical <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
+  ddd_boostrap <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                  pname = "partition", xformla = ~x1 + x2,
                   data = test_panel, control_group = NULL, base_period = NULL, est_method = "dr",
-                  weightsname = NULL, boot = TRUE, nboot = 1000,
+                  weightsname = NULL, boot = TRUE, nboot = 1000, cband = TRUE,
                   inffunc = FALSE, skip_data_checks = FALSE)
 
-  ddd_boostrap <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
+  ddd_analytical <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
                   pname = "partition", xformla = ~x1 + x2,
                   data = test_panel, control_group = NULL, base_period = NULL, est_method = "dr",
                   weightsname = NULL, boot = FALSE, nboot = NULL,
@@ -25,3 +25,28 @@ test_that("multiplication works", {
   # Check that standard errors are comparable
   expect_equal(ddd_analytical$se, ddd_boostrap$se, tolerance = 0.5)
 })
+
+# Testing clustered standard error is working correctly
+test_that("clustered standard errors are working correctly", {
+  # generating dataset without errors
+  test_panel = gen_dgp_2periods(size = 5000, dgp_type = 1)$data
+
+  # ------------------------------
+  # Performing tests
+  # ------------------------------
+
+  att_nocluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state",
+                pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4, base_period = "universal",
+                data = test_panel, control_group = "nevertreated", est_method = "dr")
+
+  att_cluster <-  ddd(yname = "y", tname = "time", idname = "id", gname = "state",
+                      pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4,
+                      data = test_panel, control_group = "nevertreated", boot = TRUE, nboot = 1000, cband = TRUE,
+                      base_period = "universal", est_method = "dr", cluster = "cluster")
+
+  # Check that point estimates are the same
+  expect_equal(att_nocluster$ATT, att_cluster$ATT)
+
+  # Check that standard errors are different
+  expect_false(isTRUE(all.equal(att_nocluster$se, att_cluster$se)))
+})
diff --git a/tests/testthat/test-output_agg_ddd.R b/tests/testthat/test-output_agg_ddd.R
@@ -1,7 +1,7 @@
 # Testing if agg_ddd in generating output
 test_that("Testing generation of output in aggregation function", {
 
-  data <- gen_dgp_mult_periods(size = 10000, dgp_type = 1)[["data"]]
+  data <- gen_dgp_mult_periods(size = 1000, dgp_type = 1)[["data"]]
 
   # Performing simple tests
   out <- ddd(yname = "y", tname = "time", idname = "id",