Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: triplediff
Title: Triple-Difference Estimators
Version: 0.1.0
Version: 0.1.1
Authors@R: c(person("Marcelo", "Ortiz-Villavicencio", email = "marcelo.ortiz@emory.edu", role = c("aut", "cre")),
person("Pedro H. C.", "Sant'Anna", email = "pedro.santanna@emory.edu", role = c("aut"))
)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# triplediff 0.1.0

* Initial release of triplediff in alpha stage, functions for computing group-time average treatment effects in DDD and combining them into a smaller number of parameters are available.

# triplediff 0.1.1

* Bug fix in `cluster` parameter. When user specifies a cluster variable, the function now correctly uses it for clustering standard errors performing Multiplier Bootstrap.
3 changes: 1 addition & 2 deletions R/att_dr.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ att_dr <- function(did_preprocessed) {
boot <- did_preprocessed$boot
nboot <- did_preprocessed$nboot
alpha <- did_preprocessed$alpha
cband <- did_preprocessed$cband
use_parallel <- did_preprocessed$use_parallel # to perform bootstrap
cores <- did_preprocessed$cores # to perform bootstrap
cband <- did_preprocessed$cband # to perform bootstrap + simult. conf. band
Expand Down Expand Up @@ -97,7 +96,7 @@ att_dr <- function(did_preprocessed) {
# get critical value to compute uniform confidence bands
cv <- boot_result$unif_crit_val
if(cv >= 7){
warning("Simultaneous critical value is arguably `too large' to be realible. This usually happens when number of observations per group is small and/or there is no much variation in outcomes.")
warning("Simultaneous critical value is arguably `too large' to be reliable. This usually happens when number of observations per group is small and/or there is no much variation in outcomes.")
}

} else {
Expand Down
9 changes: 8 additions & 1 deletion R/ddd.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,14 @@ NULL
#'
#' summary(att_22)
#'
#' # Performing clustered standard errors with mutiplier bootstrap
#'
#' att_cluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state",
#' pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4,
#' data = df, control_group = "nevertreated",
#' base_period = "universal", est_method = "dr", cluster = "cluster")
#'
#' summary(att_cluster)
#'
#' #----------------------------------------------------------
#' # Triple Diff with multiple time periods
Expand Down Expand Up @@ -297,7 +304,7 @@ ddd <- function(yname,
multiple_periods = multiple_periods,
# learners = args$learners,
# n_folds = args$n_folds,
cband = args$cband,
cband = dp$cband, # getting from dp because it could change in the pre process
cluster = args$cluster,
boot = dp$boot, # getting from dp because it could change in the pre process
alpha = dp$alpha, # getting from dp because it could change in the pre process
Expand Down
103 changes: 81 additions & 22 deletions R/preprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ run_nopreprocess_2periods <- function(yname,
nboot <- 999
args$nboot <- nboot
}

if(!cband){
warning("cband = FALSE. Setting cband = TRUE for bootstrapped standard errors.")
cband <- TRUE
args$cband <- cband
}
}

# Flags for cluster variable
Expand All @@ -78,6 +84,15 @@ run_nopreprocess_2periods <- function(yname,
if (length(cluster) > 1) {
stop("You can only provide 1 cluster variable additionally to the one provided in idname. Please check your arguments")
}

# check if bootstrap is on
if (!boot){
warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.")
boot <- TRUE
args$boot <- boot
cband <- TRUE
args$cband <- cband
}
}


Expand Down Expand Up @@ -210,15 +225,6 @@ run_preprocess_2Periods <- function(yname,
# Error checking
#-------------------------------------

# Flag for parallel and cores
if (boot){
if ((use_parallel) && (is.null(cores))) {
warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core as default.")
cores <- 1
args$cores <- cores
}
}

# Flag for alpha > 0.10
if (alpha > 0.10) {
warning("alpha = ", alpha, " is too high. Using alpha = 0.05 as default.")
Expand All @@ -234,12 +240,18 @@ run_preprocess_2Periods <- function(yname,
# }

# setting default bootstrap reps
if (boot == TRUE){
if (boot){
if (is.null(nboot)){
warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
nboot <- 999
args$nboot <- nboot
}

if(!cband){
warning("cband = FALSE. Setting cband=TRUE for bootstrapped standard errors.")
cband <- TRUE
args$cband <- cband
}
}

# Run argument checks
Expand All @@ -265,12 +277,37 @@ run_preprocess_2Periods <- function(yname,
# Check that cluster variables do not vary over time within each unit
if (length(cluster) > 0) {
# Efficiently check for time-varying cluster variables
clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = id, .SDcols = cluster]
clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = idname, .SDcols = cluster]
# If any cluster variable varies over time within any unit, stop execution
if (!all(unlist(clust_tv[, -1, with = FALSE]))) {
stop("triplediff cannot handle time-varying cluster variables at the moment. Please check your cluster variable.")
}
}

# check if bootstrap is on
if (!boot){
warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.")
boot <- TRUE
args$boot <- boot
cband <- TRUE
args$cband <- cband

# adding boot reps too
if (is.null(nboot)){
warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
nboot <- 999
args$nboot <- nboot
}
}
}

# Flag for parallel and cores
if (boot){
if ((use_parallel) && (is.null(cores))) {
warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core as default.")
cores <- 1
args$cores <- cores
}
}

# set weights
Expand Down Expand Up @@ -457,15 +494,6 @@ run_preprocess_multPeriods <- function(yname,
# Error checking
#-------------------------------------

# Flag for parallel and cores
if (boot){
if ((use_parallel) && (is.null(cores))) {
warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core.")
cores <- 1
args$cores <- cores
}
}

# Flag for alpha > 0.10
if (alpha > 0.10) {
warning("alpha = ", alpha, " is too high. Using alpha = 0.05 as default.")
Expand All @@ -481,12 +509,18 @@ run_preprocess_multPeriods <- function(yname,
# }

# setting default bootstrap reps
if (boot == TRUE){
if (boot){
if (is.null(nboot)){
warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
nboot <- 999
args$nboot <- nboot
}

if(!cband){
warning("cband = FALSE. Setting cband=TRUE for bootstrapped standard errors.")
cband <- TRUE
args$cband <- cband
}
}

# Run argument checks
Expand All @@ -512,12 +546,37 @@ run_preprocess_multPeriods <- function(yname,
# Check that cluster variables do not vary over time within each unit
if (length(cluster) > 0) {
# Efficiently check for time-varying cluster variables
clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = id, .SDcols = cluster]
clust_tv <- dta[, lapply(.SD, function(col) length(unique(col)) == 1), by = idname, .SDcols = cluster]
# If any cluster variable varies over time within any unit, stop execution
if (!all(unlist(clust_tv[, -1, with = FALSE]))) {
stop("triplediff cannot handle time-varying cluster variables at the moment. Please check your cluster variable.")
}
}

# check if bootstrap is on
if (!boot){
warning("Clustered SEs are only available when boot=TRUE. Setting boot=TRUE and cband=TRUE for bootstrapped standard errors.")
boot <- TRUE
args$boot <- boot
cband <- TRUE
args$cband <- cband

# adding boot reps too
if (is.null(nboot)){
warning("Number of bootstrap samples not specified. Defaulting to 999 reps.")
nboot <- 999
args$nboot <- nboot
}
}
}

# Flag for parallel and cores
if (boot){
if ((use_parallel) && (is.null(cores))) {
warning("Parallel processing is enabled but the number of cores is not specified. Using 1 core.")
cores <- 1
args$cores <- cores
}
}

# set in-blank xformla if no covariates are provided
Expand Down
7 changes: 4 additions & 3 deletions R/validation_check.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ validate_args_2Periods <- function(args, dta){
nboot <- args$nboot
inffunc <- args$inffunc
cband <- args$cband
cluster <- args$cluster

# flag for boot and cband
if ((!boot) && (cband)){
stop("cband is only available when boot = TRUE")
}
# if ((!boot) && (cband) && (!is.null(cluster))){
# stop("Clustered SEs are only available when boot=TRUE. Please, double check your arguments.")
# }

# Flag for yname
if (!is.element(yname, base::colnames(dta))) {
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Triple Differences Estimators <img src="man/figures/triplediff-logo.png" align="right" alt="" width="155" />

![](https://img.shields.io/badge/release%20lifecycle-alpha-orange.svg)
[![](https://img.shields.io/badge/devel%20version-0.1.0-blue.svg)](https://github.com/marcelortizv/triplediff)
[![](https://img.shields.io/badge/devel%20version-0.1.1-blue.svg)](https://github.com/marcelortizv/triplediff)
[![](https://img.shields.io/badge/doi-10.48550/arXiv.2505.09942-yellow.svg)](https://doi.org/10.48550/arXiv.2505.09942)

<!-- README.md is generated from README.Rmd. Please edit that file -->
Expand Down
7 changes: 7 additions & 0 deletions man/ddd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 28 additions & 3 deletions tests/testthat/test-att_dr.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ test_that("multiplication works", {
# Performing tests
# ------------------------------

ddd_analytical <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
ddd_boostrap <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
pname = "partition", xformla = ~x1 + x2,
data = test_panel, control_group = NULL, base_period = NULL, est_method = "dr",
weightsname = NULL, boot = TRUE, nboot = 1000,
weightsname = NULL, boot = TRUE, nboot = 1000, cband = TRUE,
inffunc = FALSE, skip_data_checks = FALSE)

ddd_boostrap <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
ddd_analytical <- ddd(yname = "outcome", tname = "year", idname = "id", gname = "treat",
pname = "partition", xformla = ~x1 + x2,
data = test_panel, control_group = NULL, base_period = NULL, est_method = "dr",
weightsname = NULL, boot = FALSE, nboot = NULL,
Expand All @@ -25,3 +25,28 @@ test_that("multiplication works", {
# Check that standard errors are comparable
expect_equal(ddd_analytical$se, ddd_boostrap$se, tolerance = 0.5)
})

# Testing clustered standard error is working correctly
test_that("clustered standard errors are working correctly", {
# generating dataset without errors
test_panel = gen_dgp_2periods(size = 5000, dgp_type = 1)$data

# ------------------------------
# Performing tests
# ------------------------------

att_nocluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state",
pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4, base_period = "universal",
data = test_panel, control_group = "nevertreated", est_method = "dr")

att_cluster <- ddd(yname = "y", tname = "time", idname = "id", gname = "state",
pname = "partition", xformla = ~cov1 + cov2 + cov3 + cov4,
data = test_panel, control_group = "nevertreated", boot = TRUE, nboot = 1000, cband = TRUE,
base_period = "universal", est_method = "dr", cluster = "cluster")

# Check that point estimates are the same
expect_equal(att_nocluster$ATT, att_cluster$ATT)

# Check that standard errors are different
expect_false(isTRUE(all.equal(att_nocluster$se, att_cluster$se)))
})
2 changes: 1 addition & 1 deletion tests/testthat/test-output_agg_ddd.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Testing if agg_ddd in generating output
test_that("Testing generation of output in aggregation function", {

data <- gen_dgp_mult_periods(size = 10000, dgp_type = 1)[["data"]]
data <- gen_dgp_mult_periods(size = 1000, dgp_type = 1)[["data"]]

# Performing simple tests
out <- ddd(yname = "y", tname = "time", idname = "id",
Expand Down