From 72e22e5c708ef87868a3fa90552fb3054329eb4d Mon Sep 17 00:00:00 2001 From: Benedikt Sommer Date: Fri, 8 May 2026 15:24:03 +0200 Subject: [PATCH 1/6] removing R and null.sim --- NAMESPACE | 2 - NEWS.md | 31 ++++---- R/estimate.default.R | 137 ++-------------------------------- inst/misc/estimate_sim.R | 155 +++++++++++++++++++++++++++++++++++++++ man/estimate.default.Rd | 32 ++------ 5 files changed, 184 insertions(+), 173 deletions(-) create mode 100644 inst/misc/estimate_sim.R diff --git a/NAMESPACE b/NAMESPACE index 598a34fc..caa21fa3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -155,7 +155,6 @@ S3method(estimate,MAR) S3method(estimate,array) S3method(estimate,data.frame) S3method(estimate,default) -S3method(estimate,estimate.sim) S3method(estimate,formula) S3method(estimate,glm) S3method(estimate,list) @@ -294,7 +293,6 @@ S3method(print,diagtest) S3method(print,effects) S3method(print,equivalence) S3method(print,estimate) -S3method(print,estimate.sim) S3method(print,fix) S3method(print,gkgamma) S3method(print,gof.lvmfit) diff --git a/NEWS.md b/NEWS.md index 4bd0f90f..1561ae42 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # lava 1.9.1 Development version + - `estimate.default`: removed `R` and `null.sim` arguments - Safe evaluation of rank in `wald_test` - adding CI Length to `summary.sim` output - fixing bug wrt `estimate.index` in `summary.sim` @@ -50,7 +51,7 @@ - `as.data.frame.sim`, `as.matrix.sim` - fixed issues with quasi* families and negative binomial regression (`MASS:glm.nb`) -# lava 1.7.3 +# lava 1.7.3 - `parameter.estimate` method to extract matrix with estimates, standard errors, and confidence limits from and estimate object (coefmat element) - pairwise difference with `'-'.estimate` and `pairwise.diff` @@ -68,13 +69,13 @@ - cluster.index now also works when not loading the package (directly calling lava::estimate) - `weibull.lvm` and `coxExponential.lvm` now uses default parametrizations similar to `rweibull`, `rexp`. `weibull.lvm` now has arguments - "intercept","sigma" that directly relates to the accelerated failure time formulation. + "intercept","sigma" that directly relates to the accelerated failure time formulation. - Packages `gof`, `lava.tobit` are removed from Suggested packages. - + # lava 1.7.1 - Fixed bug in variance estimates from `estimate` with clustered observations. - - Discrete uniform distributions can now be specified with `uniform.lvm(value=...)`. - + - Discrete uniform distributions can now be specified with `uniform.lvm(value=...)`. + # lava 1.7.0 - `cv` method moved to the 'targeted' package - New `IC` method that returns influence function of a model object. The `iid` @@ -83,7 +84,7 @@ function and not the sample-size scaled version returned by the `iid` method). - fixed bug where calls like `regression("y", value=function(x) x)` did not work. `merge.estimate` now works without IC element - + # lava 1.6.10 - Improved starting values for MLE optimization. - New simulation distributions: `multinomial.lvm`, `none.lvm`, `constant.lvm`, @@ -91,23 +92,23 @@ - `regression`, `regression.lvm`: the 'value' argument can now be a (non-linear) function specifying the functional relationship between outcomes and covariates (for simulation with the `sim` method). - - New `intervention` method for applying interventions on `lvm`-objects - - Progress updates are now done via the `progressr` library (enabled with + - New `intervention` method for applying interventions on `lvm`-objects + - Progress updates are now done via the `progressr` library (enabled with `progressr::handlers(global=TRUE)`). - Parallelization is now controlled via the future library. To enable multicore parallelization: `future::plan("multicore")`. - New `plot_region` function for adding confidence regions to plots. - + # lava 1.6.9 - - `idplot`: now accepts matrix or data.frame as 1st argument. + - `idplot`: now accepts matrix or data.frame as 1st argument. New argument: return.data. - - Unit tests updated - - Bug fixes: + - Unit tests updated + - Bug fixes: `cv`: rmse output fixed. - score: Fixed bug for linear Gaussian model with argument 'indiv=TRUE'. + score: Fixed bug for linear Gaussian model with argument 'indiv=TRUE'. estimate.formula: call object initialized correctly. `plot.lvm`: 'noplot' argument now works with all plot engines. - + # lava 1.6.8.1 - Maintenance release - `confpred`: split-conformal prediction method updated @@ -319,7 +320,7 @@ added. - 'sim': parameters can now be specified as part of '...' - summary.sim: calculate Wald CI if confint=TRUE, otherwise use the - user supplied confidence limits. + user supplied confidence limits. - Clopper-pearson intervals and exact binomial tests added to 'diagtest'. - Interval censoring with 'normal' estimator, which now also works with 'binary' definitions. diff --git a/R/estimate.default.R b/R/estimate.default.R index 37f6910c..5f6b58d7 100644 --- a/R/estimate.default.R +++ b/R/estimate.default.R @@ -42,8 +42,6 @@ estimate <- function(x, ...) UseMethod("estimate") ##' @param back.transform (optional) transform of parameters and confidence ##' intervals ##' @param folds (optional) aggregate influence functions (divide and conquer) -##' @param R Number of simulations (simulated p-values) -##' @param null.sim Mean under the null for simulations ##' @details ##' ##' influence function decomposition of estimator \eqn{\widehat{\theta}} based @@ -153,20 +151,6 @@ estimate <- function(x, ...) UseMethod("estimate") ##' IC(merge(l1,l2,l3,id=FALSE)) # independence ##' ##' -##' ## Monte Carlo approach, simple trend test example -##' -##' m <- categorical(lvm(),~x,K=5) -##' regression(m,additive=TRUE) <- y~x -##' d <- simulate(m,100,seed=1,'y~x'=0.1) -##' l <- lm(y~-1+factor(x),data=d) -##' -##' f <- function(x) coef(lm(x~seq_along(x)))[2] -##' null <- rep(mean(coef(l)),length(coef(l))) -##' ## just need to make sure we simulate under H0: slope=0 -##' estimate(l,f,R=1e2,null.sim=null) -##' -##' estimate(l,f) -##' ##' # ------ influence function calculus ------- ##' a <- estimate(coef = c("a" = 0.5), IC = scale(rnorm(10), scale=FALSE), id = 1:10) ##' b <- estimate(coef = c("b" = 0.8), IC = scale(rnorm(10), scale=FALSE), id = 1:10) @@ -201,9 +185,10 @@ estimate <- function(x, ...) UseMethod("estimate") ##' e == 1 # wald-test, null-hypothesis H0: b=1 ##' e == c(1,2) ##' B %*% e == 1 -##' @aliases estimate estimate.default estimate.estimate merge.estimate +##' @aliases estimate estimate.default merge.estimate ##' @aliases estimate.mlm -##' @seealso estimate.array +##' @seealso estimate.array summary.estimate coef.estimate vcov.estimate +##' transform.estimate labels.estimate IC.estimate ##' @method estimate default ##' @export estimate.default <- function(x=NULL, f=NULL, ..., data, id, @@ -220,14 +205,15 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, robust=TRUE, df=NULL, print=NULL, labels, label.width, only.coef=FALSE, back.transform=NULL, - folds=0, - R=0, - null.sim) { + folds=0) { cl <- match.call(expand.dots = TRUE) cal <- match.call() if ("iid" %in% names(cl)) { stop("The 'iid' argument is obsolete. Please use the 'IC' argument") } + if ("R" %in% names(cl) || "null.sim" %in% names(cl)) { + stop("The 'R' and 'null.sim' arguments have been removed. ") + } if (!missing(use)) { p0 <- c( "f", "contrast", "only.coef", @@ -453,27 +439,6 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, } } - ## Simulate p-value - if (R>0) { - if (is.null(f)) stop("Supply function 'f'") - if (missing(null.sim)) null.sim <- rep(0, length(pp)) - est <- f(pp) - if (is.list(est)) { - nn <- names(est) - est <- unlist(est) - names(est) <- nn - } - if (missing(labels)) { - labels <- colnames(rbind(est)) - } - res <- simnull(R, f, mu = null.sim, sigma = V, labels = labels) - return(structure(res, class=c("estimate.sim", "sim"), - coef=pp, - vcov=V, - f=f, - estimate=est)) - } - derivative <- NULL if (!is.null(f)) { form <- names(formals(f)) @@ -721,94 +686,6 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, return(structure(res, class="estimate")) } -simnull <- function(R, f, mu, sigma, labels=NULL) { - X <- rmvn0(R, mu=mu, sigma=sigma) - est <- f(mu) - res <- apply(X, 1, f) - if (is.list(est)) { - nn <- names(est) - est <- unlist(est) - names(est) <- nn - res <- matrix(unlist(res), byrow=TRUE, ncol=length(est)) - } else { - res <- t(rbind(res)) - } - if (is.null(labels)) { - labels <- colnames(rbind(est)) - if (is.null(labels)) - labels <- paste0("p", seq_along(est)) - } - colnames(res) <- labels - return(res) -} - -##' @export -estimate.estimate.sim <- function(x, f, R=0, labels, ...) { - atr <- attributes(x) - if (R>0) { - if (missing(f)) { - val <- simnull(R, f=atr[["f"]], mu=atr[["coef"]], sigma=atr[["vcov"]]) - res <- rbind(x, val) - for (a in setdiff(names(atr), c("dim", "dimnames"))) - attr(res, a) <- atr[[a]] - } else { - res <- simnull(R, f=f, mu=atr[["coef"]], sigma=atr[["vcov"]]) - for (a in setdiff(names(atr), c("dim", "dimnames", "f"))) - attr(res, a) <- atr[[a]] - attr(f, "f") <- f - est <- unlist(f(atr[["coef"]])) - if (missing(labels)) labels <- colnames(rbind(est)) - attr(res, "estimate") <- est - } - if (!missing(labels)) colnames(res) <- labels - return(res) - } - if (missing(f)) { - if (!missing(labels)) colnames(res) <- labels - return(x) - } - - est <- f(atr[["coef"]]) - res <- apply(x, 1, f) - if (is.list(est)) { - res <- matrix(unlist(res), byrow=TRUE, ncol=length(est)) - } else { - res <- t(rbind(res)) - } - if (missing(labels)) { - labels <- colnames(rbind(est)) - if (is.null(labels)) labels <- paste0("p", seq_along(est)) - } - colnames(res) <- labels - for (a in setdiff(names(atr), c("dim", "dimnames", "f", "estimate"))) - attr(res, a) <- atr[[a]] - attr(f, "f") <- f - attr(res, "estimate") <- unlist(est) - return(res) -} - -##' @export -print.estimate.sim <- function(x, level=.95, ...) { - quantiles <- c((1-level)/2, 1-(1-level)/2) - est <- attr(x, "estimate") - mysummary <- function(x, INDEX, ...) { - x <- as.vector(x) - res <- c(mean(x, na.rm=TRUE), - sd(x, na.rm=TRUE), - quantile(x, quantiles, na.rm=TRUE), - est[INDEX], - mean(abs(x)>abs(est[INDEX]), na.rm=TRUE)) - - names(res) <- c("Mean", "SD", paste0(quantiles*100, "%"), - "Estimate", "P-value") - res - } - env <- new.env() - assign("est", attr(x, "estimate"), env) - environment(mysummary) <- env - print(summary(x, fun=mysummary, ...)) -} - ##' @export print.estimate <- function(x, type=0L, digits=4L, width=25L, std.error=TRUE, p.value=TRUE, diff --git a/inst/misc/estimate_sim.R b/inst/misc/estimate_sim.R new file mode 100644 index 00000000..fd416523 --- /dev/null +++ b/inst/misc/estimate_sim.R @@ -0,0 +1,155 @@ +# ----- Deprecated: removed from lava in version 1.9.1 ----- +# Monte Carlo simulation-based p-values via estimate(). +# This code is preserved for reference only and is NOT loaded by the package. + +# ---- estimate_sim ---------------------------------------------------------- +# Standalone replacement for the removed estimate(..., R=, null.sim=) pathway. +# +# @param x model object (glm, lvmfit, ...) +# @param f transformation of model parameters +# @param R number of simulations +# @param null.sim mean under the null for simulations +# @param level level of confidence limits +# @param vcov (optional) covariance matrix of parameter estimates +# @param coef (optional) parameter coefficients +# @param labels (optional) names of coefficients +# @param robust if TRUE robust standard errors are used +# @param IC if TRUE the influence function decomposition is used +# @param ... additional arguments +estimate_sim <- function(x = NULL, f, R, null.sim, ..., + level = 0.95, vcov, coef, labels, + robust = TRUE, IC = robust) { + if (is.null(f)) stop("Supply function 'f'") + if (!missing(coef)) { + pp <- coef + } else { + pp <- stats::coef(x) + } + if (missing(vcov) || is.null(vcov)) { + if (!is.null(IC) && ((is.logical(IC) && IC) || length(IC) > 0) && robust) { + ic_theta <- lava::IC(x) + V <- var(ic_theta) / nrow(ic_theta) + } else { + V <- stats::vcov(x) + } + } else { + V <- cbind(vcov) + } + if (missing(null.sim)) null.sim <- rep(0, length(pp)) + est <- f(pp) + if (is.list(est)) { + nn <- names(est) + est <- unlist(est) + names(est) <- nn + } + if (missing(labels)) { + labels <- colnames(rbind(est)) + } + res <- simnull(R, f, mu = null.sim, sigma = V, labels = labels) + structure(res, class = c("estimate.sim", "sim"), + coef = pp, + vcov = V, + f = f, + estimate = est) +} + +simnull <- function(R, f, mu, sigma, labels = NULL) { + X <- lava::rmvn0(R, mu = mu, sigma = sigma) + est <- f(mu) + res <- apply(X, 1, f) + if (is.list(est)) { + nn <- names(est) + est <- unlist(est) + names(est) <- nn + res <- matrix(unlist(res), byrow = TRUE, ncol = length(est)) + } else { + res <- t(rbind(res)) + } + if (is.null(labels)) { + labels <- colnames(rbind(est)) + if (is.null(labels)) + labels <- paste0("p", seq_along(est)) + } + colnames(res) <- labels + return(res) +} + +estimate.estimate.sim <- function(x, f, R = 0, labels, ...) { + atr <- attributes(x) + if (R > 0) { + if (missing(f)) { + val <- simnull(R, f = atr[["f"]], mu = atr[["coef"]], sigma = atr[["vcov"]]) + res <- rbind(x, val) + for (a in setdiff(names(atr), c("dim", "dimnames"))) + attr(res, a) <- atr[[a]] + } else { + res <- simnull(R, f = f, mu = atr[["coef"]], sigma = atr[["vcov"]]) + for (a in setdiff(names(atr), c("dim", "dimnames", "f"))) + attr(res, a) <- atr[[a]] + attr(f, "f") <- f + est <- unlist(f(atr[["coef"]])) + if (missing(labels)) labels <- colnames(rbind(est)) + attr(res, "estimate") <- est + } + if (!missing(labels)) colnames(res) <- labels + return(res) + } + if (missing(f)) { + if (!missing(labels)) colnames(res) <- labels + return(x) + } + + est <- f(atr[["coef"]]) + res <- apply(x, 1, f) + if (is.list(est)) { + res <- matrix(unlist(res), byrow = TRUE, ncol = length(est)) + } else { + res <- t(rbind(res)) + } + if (missing(labels)) { + labels <- colnames(rbind(est)) + if (is.null(labels)) labels <- paste0("p", seq_along(est)) + } + colnames(res) <- labels + for (a in setdiff(names(atr), c("dim", "dimnames", "f", "estimate"))) + attr(res, a) <- atr[[a]] + attr(f, "f") <- f + attr(res, "estimate") <- unlist(est) + return(res) +} + +print.estimate.sim <- function(x, level = .95, ...) { + quantiles <- c((1 - level) / 2, 1 - (1 - level) / 2) + est <- attr(x, "estimate") + mysummary <- function(x, INDEX, ...) { + x <- as.vector(x) + res <- c(mean(x, na.rm = TRUE), + sd(x, na.rm = TRUE), + quantile(x, quantiles, na.rm = TRUE), + est[INDEX], + mean(abs(x) > abs(est[INDEX]), na.rm = TRUE)) + + names(res) <- c("Mean", "SD", paste0(quantiles * 100, "%"), + "Estimate", "P-value") + res + } + env <- new.env() + assign("est", attr(x, "estimate"), env) + environment(mysummary) <- env + print(summary(x, fun = mysummary, ...)) +} + +# ---- examples -------------------------------------------------------------- +# Monte Carlo approach, simple trend test example +# +# library(lava) +# m <- categorical(lvm(), ~x, K = 5) +# regression(m, additive = TRUE) <- y ~ x +# d <- simulate(m, 100, seed = 1, 'y~x' = 0.1) +# l <- lm(y ~ -1 + factor(x), data = d) +# +# f <- function(x) coef(lm(x ~ seq_along(x)))[2] +# null <- rep(mean(coef(l)), length(coef(l))) +# ## just need to make sure we simulate under H0: slope = 0 +# source(system.file("misc/estimate_sim.R", package = "lava")) +# estimate_sim(l, f, R = 1e2, null.sim = null) diff --git a/man/estimate.default.Rd b/man/estimate.default.Rd index fac0c805..f3b5bf2b 100644 --- a/man/estimate.default.Rd +++ b/man/estimate.default.Rd @@ -3,7 +3,6 @@ \name{estimate.default} \alias{estimate.default} \alias{estimate} -\alias{estimate.estimate} \alias{merge.estimate} \alias{estimate.mlm} \title{Estimation of functional of parameters} @@ -37,9 +36,7 @@ label.width, only.coef = FALSE, back.transform = NULL, - folds = 0, - R = 0, - null.sim + folds = 0 ) } \arguments{ @@ -108,10 +105,6 @@ returned. Use \code{parameter(estimate(...))} instead.} intervals} \item{folds}{(optional) aggregate influence functions (divide and conquer)} - -\item{R}{Number of simulations (simulated p-values)} - -\item{null.sim}{Mean under the null for simulations} } \description{ Estimation of functional of parameters. Wald tests, robust standard errors, @@ -177,8 +170,8 @@ estimate(lm(y~x,d1)) ## Marginalize f <- function(p,data) - list(p0=lava:::expit(p["(Intercept)"] + p["z"]*data[,"z"]), - p1=lava:::expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"])) + list(p0=expit(p["(Intercept)"] + p["z"]*data[,"z"]), + p1=expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"])) e <- estimate(g, f, average=TRUE) e estimate(e,diff) @@ -187,7 +180,7 @@ estimate(e,cbind(1,1)) ## Clusters and subset (conditional marginal effects) d$id <- rep(seq(nrow(d)/4),each=4) estimate(g,function(p,data) - list(p0=lava:::expit(p[1] + p["z"]*data[,"z"])), + list(p0=expit(p[1] + p["z"]*data[,"z"])), subset=d$z>0, id=d$id, average=TRUE) ## More examples with clusters: @@ -223,20 +216,6 @@ IC(merge(l1,l2,l3,id=TRUE)) # one-to-one (same clusters) IC(merge(l1,l2,l3,id=FALSE)) # independence -## Monte Carlo approach, simple trend test example - -m <- categorical(lvm(),~x,K=5) -regression(m,additive=TRUE) <- y~x -d <- simulate(m,100,seed=1,'y~x'=0.1) -l <- lm(y~-1+factor(x),data=d) - -f <- function(x) coef(lm(x~seq_along(x)))[2] -null <- rep(mean(coef(l)),length(coef(l))) -## just need to make sure we simulate under H0: slope=0 -estimate(l,f,R=1e2,null.sim=null) - -estimate(l,f) - # ------ influence function calculus ------- a <- estimate(coef = c("a" = 0.5), IC = scale(rnorm(10), scale=FALSE), id = 1:10) b <- estimate(coef = c("b" = 0.8), IC = scale(rnorm(10), scale=FALSE), id = 1:10) @@ -273,5 +252,6 @@ e == c(1,2) B \%*\% e == 1 } \seealso{ -estimate.array +estimate.array summary.estimate coef.estimate vcov.estimate +transform.estimate labels.estimate IC.estimate } From 559ebf399af398976284223fe335d4982fb1dadb Mon Sep 17 00:00:00 2001 From: Benedikt Sommer Date: Fri, 8 May 2026 16:17:35 +0200 Subject: [PATCH 2/6] remove argument --- AGENTS.md | 1 + R/estimate.default.R | 25 ++++--- man/estimate.default.Rd | 11 +-- vignettes/influencefunction.Rmd | 128 ++++++++++++++++---------------- 4 files changed, 84 insertions(+), 81 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8dbdad32..009a78c8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,6 +35,7 @@ devtools::test(filter = "estimate_default") # substring match on test file name - 2-space indent, LF line endings, UTF-8 (enforced by `.editorconfig`). - R and C/C++ files require a final newline; general files do not. +- Single hashes for comments and roxygen documentation ## CI diff --git a/R/estimate.default.R b/R/estimate.default.R index 37f6910c..d7b4856e 100644 --- a/R/estimate.default.R +++ b/R/estimate.default.R @@ -32,7 +32,6 @@ estimate <- function(x, ...) UseMethod("estimate") ##' @param null (optional) null hypothesis to test ##' @param vcov (optional) covariance matrix of parameter estimates ##' @param coef (optional) parameter coefficient -##' @param robust if TRUE robust standard errors are calculated ##' @param df degrees of freedom (default obtained from 'df.residual') ##' @param print (optional) print function for the resulting estimate object ##' @param labels (optional) names of coefficients @@ -211,13 +210,13 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, average=FALSE, subset, score.deriv, level=0.95, - IC=robust, + IC=TRUE, type=c("robust", "df", "mbn"), var.adj, keep, use, regex=FALSE, ignore.case=FALSE, contrast, null, vcov, coef, - robust=TRUE, df=NULL, + df=NULL, print=NULL, labels, label.width, only.coef=FALSE, back.transform=NULL, folds=0, @@ -228,6 +227,13 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, if ("iid" %in% names(cl)) { stop("The 'iid' argument is obsolete. Please use the 'IC' argument") } + if ("robust" %in% names(cl)) { + warning( + "The 'robust' argument is deprecated and ignored. ", + "Robust standard errors are now always computed. ", + "Use the 'vcov' argument to compute model-based SEs." + ) + } if (!missing(use)) { p0 <- c( "f", "contrast", "only.coef", @@ -279,7 +285,7 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, alpha <- 1 - level alpha.str <- paste(c(alpha/2, 1 -alpha/2)*100, "", sep="%") nn <- NULL - if ((((is.logical(IC) && IC) || length(IC)>0) && robust) && + if (((is.logical(IC) && IC) || length(IC)>0) && (missing(vcov) || is.null(vcov) || (is.logical(vcov) && vcov[1]==FALSE && !is.na(vcov[1])))) { ## If user supplied vcov, then don't estimate IC @@ -387,14 +393,11 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, if (!is.null(ic_theta) && (length(idstack)==nrow(ic_theta))) { rownames(ic_theta) <- idstack } - if (!robust) { - if (inherits(x, "lm") && family(x)$family=="gaussian" - && is.null(df)) - df <- x$df.residual - if (missing(vcov) && !is.null(x)) - suppressWarnings(vcov <- stats::vcov(x)) + if (inherits(x, "lm") && family(x)$family == "gaussian" + && is.null(df) && !missing(vcov)) { + df <- x$df.residual } - if (!is.null(ic_theta) && robust && (missing(vcov) || is.null(vcov))) { + if (!is.null(ic_theta) && (missing(vcov) || is.null(vcov))) { V <- var_ic(ic_theta) ## Small-sample corrections for clustered data K <- NROW(ic_theta) diff --git a/man/estimate.default.Rd b/man/estimate.default.Rd index fac0c805..252c9854 100644 --- a/man/estimate.default.Rd +++ b/man/estimate.default.Rd @@ -19,7 +19,7 @@ subset, score.deriv, level = 0.95, - IC = robust, + IC = TRUE, type = c("robust", "df", "mbn"), var.adj, keep, @@ -30,7 +30,6 @@ null, vcov, coef, - robust = TRUE, df = NULL, print = NULL, labels, @@ -91,8 +90,6 @@ arguments} \item{coef}{(optional) parameter coefficient} -\item{robust}{if TRUE robust standard errors are calculated} - \item{df}{degrees of freedom (default obtained from 'df.residual')} \item{print}{(optional) print function for the resulting estimate object} @@ -177,8 +174,8 @@ estimate(lm(y~x,d1)) ## Marginalize f <- function(p,data) - list(p0=lava:::expit(p["(Intercept)"] + p["z"]*data[,"z"]), - p1=lava:::expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"])) + list(p0=expit(p["(Intercept)"] + p["z"]*data[,"z"]), + p1=expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"])) e <- estimate(g, f, average=TRUE) e estimate(e,diff) @@ -187,7 +184,7 @@ estimate(e,cbind(1,1)) ## Clusters and subset (conditional marginal effects) d$id <- rep(seq(nrow(d)/4),each=4) estimate(g,function(p,data) - list(p0=lava:::expit(p[1] + p["z"]*data[,"z"])), + list(p0=expit(p[1] + p["z"]*data[,"z"])), subset=d$z>0, id=d$id, average=TRUE) ## More examples with clusters: diff --git a/vignettes/influencefunction.Rmd b/vignettes/influencefunction.Rmd index c11b0b70..75dfd81e 100644 --- a/vignettes/influencefunction.Rmd +++ b/vignettes/influencefunction.Rmd @@ -1,10 +1,10 @@ --- title: The Art of Influence -subtitle: A Practical Guide to Working with Influence Functions +subtitle: A Practical Guide to Working with Influence Functions author: Klaus Kähler Holst Rdate: "`r Sys.Date()`" output: - # html_document: + # html_document: rmarkdown::html_vignette: # toc_float: true fig_width: 6 @@ -34,7 +34,7 @@ has_geepack <- lava:::versioncheck('geepack', 1) ``` \newcommand{\arctanh}{\operatorname{arctanh}} -\newcommand{\indep}{\!\perp\!\!\!\!\perp\!} +\newcommand{\indep}{\!\perp\!\!\!\!\perp\!} \newcommand{\pr}{\mathbb{P}} \newcommand{\E}{\mathbb{E}} \newcommand{\var}{\mathbb{V}\!\text{ar}} @@ -54,7 +54,7 @@ Estimators that have parametric convergence rates can often be fully characterized by their *influence function* (IF), also referred to as an influence curve or canonical gradient [@bickel_effic_adapt_estim_semip_model; @vaart_1998_asymp]. The IF allows for the direct estimation of properties of the estimator, including its asymptotic variance. Moreover, estimates of the IF enable the simple combination and transformation of -estimators into new ones. +estimators into new ones. This vignette describes how to estimate and manipulate IFs using the R-package `lava` [@holst_budtzjorgensen_2013]. @@ -62,10 +62,10 @@ This vignette describes how to estimate and manipulate IFs using the R-package Formally, let $Z_1,\ldots,Z_n$ be iid $k$-dimensional stochastic variables, $Z_i=(Y_{i},A_{i},W_{i})\sim \Pz$, and $\widehat{\theta}$ a consistent estimator for the parameter $\theta\in\mathbb{R}^p$. When $\widehat{\theta}$ is a *regular and asymptotic linear* (RAL) estimator, it has -a unique iid decomposition +a unique iid decomposition \begin{align*} \sqrt{n}(\widehat{\theta}-\theta) = -\frac{1}{\sqrt{n}}\sum_{i=1}^n \IC(Z_i; \Pz) + \op(1), -\end{align*} +\frac{1}{\sqrt{n}}\sum_{i=1}^n \IC(Z_i; \Pz) + \op(1), +\end{align*} where the function $\IC$ is the unique *Influence Function* s.t. $\mathbb{E}\{\IC(Z_{i}; \Pz)\}=0$ and $\var\{\IC(Z_{i}; \Pz)^{2}\}<\infty$ [@tsiatis2006semiparametric; @@ -91,14 +91,14 @@ models as illustrated in the [next example sections](#example-generalized-linear also be derived for a smooth target parameter $\Psi: \mathcal{P}\to\mathbb{R}$ where $\mathcal{P}$ is a family of probability distributions forming the statistical model, which often can be left completely -non-parametric. +non-parametric. Formally, the parameter must be *pathwise differentiable* see [@vaart_1998_asymp] in the sense that there exists linear bounded function $\dot\Psi\colon L_{2}(P_{0})\to\mathbb{R}$ such that $[\Psi(P_{t}) - \Psi(P_{0}))]t^{-1} \to \dot\Psi(P_{0})(g)$ as $t\to 0$ for any parametric submodel $P_t$ with score model $g(z)= \partial/(\partial t) \log (p_t)(z)|_{t=0}$. Riesz's -representation theorem then tells us that the directional derivative has a +representation theorem then tells us that the directional derivative has a unique representer, $\phi_{P_{0}}$ lying in the closure of the submodel score space (the *tangent space*), s.t. \begin{align*} @@ -114,7 +114,7 @@ As an example we might be interested in the target parameter $\Psi(P) = \E_P(Z)$ which can be shown to have the unique (and thereby efficient) influence function $Z\mapsto Z-\E_P(Z)$ under the non-parametric model. Another target parameter could be $\Psi_{a}(P) = \E_{P}[\E_{P}(Y\mid A=a, W)]$ which is often -a key interest in causal inference and which has the IF +a key interest in causal inference and which has the IF \begin{align*} \IC(Y,A,W; P) = \frac{\one(A=a)}{\pr(A=a\mid W)}(Y-\E_{P}[Y\mid A=a,W]) + \E_{P}[Y\mid A=a,W] - \Psi_{a}(P) @@ -146,14 +146,14 @@ asymptotic variance estimate via the `vcov` argument, without specifying the IC matrix. ```{r estimate.syntax, eval=FALSE} -estimate(x=, ...) +estimate(x=, ...) estimate(coef=, IC=, ...) estimate(coef=, vcov=, ...) ``` A typical call could look like ```{r estimate.examples, eval=FALSE} -merge(subset(estimate(x), 1), estimate(coef=p, IC=ic, id=id)) |> +merge(subset(estimate(x), 1), estimate(coef=p, IC=ic, id=id)) |> transform(function(x) c(exp(x), exp(x[1]))) |> # parameter transformation labels(c("a", "b")) # rename parameters ``` @@ -169,7 +169,7 @@ We refer to section # Examples - + To illustrate the methods we consider data arising from the model $Y_{ij} \sim Bernoulli\{\operatorname{expit}(X_{ij} + A_{i} + W_{i})\}, A_{i} \sim Bernoulli\{\operatorname{expit}(W_{i})\}$ @@ -208,7 +208,7 @@ Print(dl) Here we first consider the problem of estimating the IF of the mean. For a general transformation $f: \mathbb{R}^k\to\mathbb{R}^p$ we have that $$ -\sqrt{n}\{\mathbb{P}_{n}f(X) - \E[f(X)]\} = \frac{1}{\sqrt{n}}\sum_{i=1}^{n} +\sqrt{n}\{\mathbb{P}_{n}f(X) - \E[f(X)]\} = \frac{1}{\sqrt{n}}\sum_{i=1}^{n} f(X_{i}) - \E[f(X)] $$ - + ## Example: random effects model / structural equation model -General structural equation models (SEMs) can be estimated with `lava::lvm`. +General structural equation models (SEMs) can be estimated with `lava::lvm`. Here we fit a random effects probit model $$ \pr(Y_{ij} = 1 \mid U_{i}, W_{ij})=\Phi(\mu_{j} + \beta_{j} W_{ij} + U_{i}), \quad U_{i}\sim\mathcal{N}(0,\sigma_{u}^{2}),\quad j=1,2 @@ -384,16 +386,16 @@ estimate(semfit) Let $\beta$ denote the $\tau$th quantile of $X$, with IF \begin{align*} -\IC(x; \Pz) = \tau - \one(x\leq \beta)f_{0}(\beta)^{-1} +\IC(x; \Pz) = \tau - \one(x\leq \beta)f_{0}(\beta)^{-1} \end{align*} where $f_{0}$ is the density function of $X$. To calculate the variance estimate, an estimate of the density is thus needed which can be -obtained by a kernel estimate. Alternatively, +obtained by a kernel estimate. Alternatively, the resampling method of [@zenglin2008] can be applied. Here we use a kernel smoother (additional arguments to the `estimate` function -are parsed on to `stats::density.default`) to estimate the quantiles and IF for +are parsed on to `stats::density.default`) to estimate the quantiles and IF for the 25%, 50%, and 75% quantiles of $W$ and $X_1$ ```{r quantiles} eq <- estimate(dw[, c("w", "x1")], type = "quantile", probs = c(0.25, 0.5, 0.75)) @@ -406,7 +408,7 @@ IC(eq) |> head() A key benefit of working with the IFs of estimators is that this allows for transforming or combining different estimates while easily deriving -the resulting IF and thereby asymptotic distribution of the new estimator. +the resulting IF and thereby asymptotic distribution of the new estimator. Let $\widehat{\theta}_{1}, \ldots, \widehat{\theta}_{M}$ be $M$ different estimators with decompositions @@ -440,11 +442,11 @@ e <- merge(g1, g2) summary(e) ``` As we have access to the joint asymptotic distribution we can for example test -for whether the odds-ratio is the same for the two responses: +for whether the odds-ratio is the same for the two responses: ```{r hypo1} estimate(e, cbind(0,1,0,-1), null=0) -``` +``` More details an be found in the Section on [hypothesis testing](#linear-contrasts-and-hypothesis-testing). ## Imbalanced data @@ -483,20 +485,20 @@ $IC_{1}(\cdot; \Pz)$ and $IC_{2}(\cdot;\ P)$. It then follows that \sqrt{N}\left\{ \begin{pmatrix} \widehat{\theta}_1 \\ -\widehat{\theta}_2 +\widehat{\theta}_2 \end{pmatrix} -- +- \begin{pmatrix} \vphantom{\widehat{\theta}_1}\theta_1 \\ -\vphantom{\widehat{\theta}_1}\theta_2 +\vphantom{\widehat{\theta}_1}\theta_2 \end{pmatrix} \right\} -= -\frac{1}{\sqrt{N}}\sum_{i=1}^N += +\frac{1}{\sqrt{N}}\sum_{i=1}^N \begin{pmatrix} IC_1(Z_{1i}; \Pz)\frac{R_{1i}N}{R_{1\bullet}} \\ IC_2(Z_{2i}; \Pz)\frac{R_{2i}N}{R_{2\bullet}} -\end{pmatrix} +\end{pmatrix} + \op(1) \end{align*} @@ -508,7 +510,7 @@ data) with the `merge` function ```{r glmmargmis} g2 <- glm(y2 ~ 1, family = binomial, data = dw) summary(g2) -dwc <- na.omit(dw) +dwc <- na.omit(dw) g3 <- glm(y3 ~ 1, family = binomial, data = dwc) summary(g3) @@ -528,7 +530,7 @@ merge(e2, e3, id = list(dw$id, dwc$id)) ``` In the above example the `id` argument defines the identifier that makes it -possible to link the rows in the different IFs that should be glued together. +possible to link the rows in the different IFs that should be glued together. If omitted then the `id` will automatically be extracted from the model-specific `IC` method (deriving it from the original data.frame used for estimating the model). This automatically works with all models and `IC` methods described in this document. @@ -550,7 +552,7 @@ merge(g1, g2, id = NULL) |> vcov() ## Renaming and subsetting parameters -To only keep a subset of the parameters the `keep` argument can be used. +To only keep a subset of the parameters the `keep` argument can be used. ```{r mergekeep} merge(g1, g2, keep = c("(Intercept)", "(Intercept).1")) ``` @@ -558,14 +560,14 @@ The argument can be given either as character vector or a vector of indices: ```{r merge2} merge(g1, g2, keep=c(1, 3)) ``` -or as a vector of perl-style regular expressions +or as a vector of perl-style regular expressions ```{r merge3} merge(g1, g2, keep = "cept", regex = TRUE) merge(g1, g2, keep = c("\\)$", "^a$"), regex = TRUE, ignore.case = TRUE) ``` When merging estimates unique parameter names are created. It is also possible -to rename the parameters with the `labels` argument +to rename the parameters with the `labels` argument ```{r merge4} merge(g1, g2, labels = c("a", "b", "c")) |> estimate(keep = c("a", "c")) merge(g1, g2, @@ -576,7 +578,7 @@ estimate(g1, labels=c("a", "b")) ``` Finally, the `subset` argument can be used to subset the parameters and IFs -before the actual merging is being done +before the actual merging is being done ```{r merge5} merge(g1, g2, subset="(Intercept)") ``` @@ -593,9 +595,9 @@ $$ \frac{1}{\sqrt{N}} \sum_{i=1}^{n} \sum_{k=1}^{N_{i}} IC(Z_{ik}; \Pz) + \op(1). $$ -It then follows that +It then follows that $$ -\sqrt{n}(\widehat{\theta}-\theta) = +\sqrt{n}(\widehat{\theta}-\theta) = \frac{1}{\sqrt{n}} \sum_{i=1}^{n} \widetilde{\IC}(Z_{i}; \Pz) + \op(1) $$ @@ -612,21 +614,21 @@ standard errors from the above iid decomposition ```{r cluster2} estimate(g0, id=dl$id) ``` -We can confirm that this situation is equivalent to the variance estimates we +We can confirm that this situation is equivalent to the variance estimates we obtain from a GEE marginal model with working independence correlation structure [@r_geepack] ```{r geepack, eval=has_geepack } gee0 <- geepack::geeglm(y ~ a + w + x, data = dl, id = dl$id, family=binomial) summary(gee0) ``` -## Computational aspects +## Computational aspects Working with large and potentially multiple different IFs can be memory-intensive. A remedy is to use the idea of aggregating the IFs by introducing a random coarser grouping variable. Following the same arguments as in the previous section, the aggregated IF will still be iid and allows us to estimate the asymptotic variance. Obviously, the same grouping must be used across estimates -when combining IFs. +when combining IFs. ```{r aggregate} set.seed(1) y <- cbind(rnorm(1e5)) @@ -635,7 +637,7 @@ id <- foldr(nrow(y), N, list=FALSE) Print(cbind(table(id))) ## Aggregated IF -e <- estimate(cbind(y), id = id) +e <- estimate(cbind(y), id = id) object.size(e) e ``` @@ -645,21 +647,21 @@ e Let $\phi\colon \mathbb{R}^p\to\mathbb{R}^m$ be differentiable at $\theta$ and assume that $\widehat{\theta}_n$ is RAL estimator with IF given by $\IC(\cdot; \Pz)$ such that -\begin{align*} +\begin{align*} \sqrt{n}(\widehat{\theta}_n - \theta) = -\frac{1}{\sqrt{n}}\sum_{i=1}^n \IC(Z_i; \Pz) + \op(1), -\end{align*} +\frac{1}{\sqrt{n}}\sum_{i=1}^n \IC(Z_i; \Pz) + \op(1), +\end{align*} then by the delta method [@vaart_1998_asymp Theorem 3.1] -\begin{align*} +\begin{align*} \sqrt{n}\{\phi(\widehat{\theta}_n) - \phi(\theta)\} = -\frac{1}{\sqrt{n}}\sum_{i=1}^n \nabla\phi(\theta)\IC(Z_i; \Pz) + \op(1), -\end{align*} +\frac{1}{\sqrt{n}}\sum_{i=1}^n \nabla\phi(\theta)\IC(Z_i; \Pz) + \op(1), +\end{align*} where $\phi\colon \theta\mapsto (\phi_{1}(\theta),\ldots,\phi_{m}(\theta))^\top$ and $\nabla$ is the partial derivative operator \begin{align*} -\nabla\phi(\theta) = +\nabla\phi(\theta) = \begin{pmatrix} \tfrac{\partial}{\partial\theta_1}\phi_1(\theta) & \cdots & \tfrac{\partial}{\partial\theta_p}\phi_1(\theta) \\ @@ -680,9 +682,9 @@ estimate(g1, sum) estimate(g1, function(p) list(a = sum(p))) # named list ## Multiple parameters estimate(g1, function(x) c(x, x[1] + exp(x[2]), inv = 1 / x[2])) -estimate(g1, exp) +estimate(g1, exp) ``` -The gradient can be provided as the attribute `grad` and otherwise numerical differentiation is applied. +The gradient can be provided as the attribute `grad` and otherwise numerical differentiation is applied. An often more convenient syntax is to directly transform the `estimate` objects @@ -794,7 +796,7 @@ est <- lm(cbind(x1, x2, x1 * x2) ~ 1, data = dw) |> estimate(labels = c("E1", "E2", "E12")) est -est["E12"] - est["E2"]*est["E1"] +est["E12"] - est["E2"]*est["E1"] # transform(e1, function(x) c(x, cov=with(as.list(x), E12 - E2* E1))) # Same result ``` The variance estimates can be estimated in the same way and combined to estimate the correlation @@ -803,7 +805,7 @@ v12 <- with(dw, Cov(x1, x2, id = id)) v1 <- with(dw, Cov(x1, x1, id = id)) v2 <- with(dw, Cov(x2, x2, id = id)) -rho <- c(rho = v12 / sqrt(v1 * v2)) +rho <- c(rho = v12 / sqrt(v1 * v2)) rho ``` @@ -856,13 +858,13 @@ The $\bm{b}_0$ vector (default assumed to be zero) can be specified via the `nul estimate(gg, B, null=1) ``` -For testing multiple hypotheses we use that +For testing multiple hypotheses we use that $(\bm{B}\widehat{\theta}-\bm{b}_0)^{\top} (\bm{B}\widehat{\Sigma}\bm{B}^{\top})^{-1} (\bm{B}\widehat{\theta}-\bm{b}_0) \sim \chi^2_{\operatorname{rank}(B)}$ under the null hypothesis where $\widehat{\Sigma}$ is the estimated variance of $\theta$ (i.e., -`vcov(gg)`) +`vcov(gg)`) ```{r estcontrast2} B <- rbind(cbind(0,1, 0,-1, 0,0), cbind(0,1, 0,0, 0,-1)) @@ -917,7 +919,7 @@ alpha_zmax(gg0) While this always yields a more powerful test compared to Bonferroni adjustments, a more powerful closed-testing procedure [@marcus1976], can be generally -obtained by considering all intersection hypotheses. +obtained by considering all intersection hypotheses. ![*Figure: closed testing via Wald tests of all intersections hypotheses.*](../man/figures/closedtesting.svg){width=60%} From b971a6584a3662c13643e60661d4d77a9ef40997 Mon Sep 17 00:00:00 2001 From: Benedikt Sommer Date: Fri, 8 May 2026 21:13:02 +0200 Subject: [PATCH 3/6] roxygen --- R/estimate.default.R | 7 +++++-- man/estimate.default.Rd | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/estimate.default.R b/R/estimate.default.R index d7b4856e..7d5e0bde 100644 --- a/R/estimate.default.R +++ b/R/estimate.default.R @@ -30,7 +30,9 @@ estimate <- function(x, ...) UseMethod("estimate") ##' @param ignore.case Ignore case-sensitiveness in regular expression ##' @param contrast (optional) Contrast matrix for final Wald test ##' @param null (optional) null hypothesis to test -##' @param vcov (optional) covariance matrix of parameter estimates +##' @param vcov (optional) covariance matrix of parameter estimates or logical. +##' If TRUE, then [stats::vcov] is used to obtain the covariance matrix from the +##' provided model object `x` ##' @param coef (optional) parameter coefficient ##' @param df degrees of freedom (default obtained from 'df.residual') ##' @param print (optional) print function for the resulting estimate object @@ -81,7 +83,8 @@ estimate <- function(x, ...) UseMethod("estimate") ##' estimate(g, 2, 3) ##' ##' ## Usual (non-robust) confidence intervals -##' estimate(g, robust=FALSE) +##' estimate(g, vcov=TRUE) +##' estimate(g, vcov=vcov(g)) ##' ##' ## Transformations ##' estimate(g, function(p) p[1]+p[2]) diff --git a/man/estimate.default.Rd b/man/estimate.default.Rd index 252c9854..5006f5bd 100644 --- a/man/estimate.default.Rd +++ b/man/estimate.default.Rd @@ -86,7 +86,9 @@ arguments} \item{null}{(optional) null hypothesis to test} -\item{vcov}{(optional) covariance matrix of parameter estimates} +\item{vcov}{(optional) covariance matrix of parameter estimates or logical. +If TRUE, then \link[stats:vcov]{stats::vcov} is used to obtain the covariance matrix from the +provided model object \code{x}} \item{coef}{(optional) parameter coefficient} @@ -149,7 +151,8 @@ estimate(g, "1", "2"-"3", null = c(0,1)) estimate(g, 2, 3) ## Usual (non-robust) confidence intervals -estimate(g, robust=FALSE) +estimate(g, vcov=TRUE) +estimate(g, vcov=vcov(g)) ## Transformations estimate(g, function(p) p[1]+p[2]) From 6419732ca70e3804084babeecebfdc79514a6d5f Mon Sep 17 00:00:00 2001 From: Benedikt Sommer Date: Fri, 8 May 2026 21:34:40 +0200 Subject: [PATCH 4/6] deprecation tests --- tests/testthat/test-estimate_default.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/testthat/test-estimate_default.R b/tests/testthat/test-estimate_default.R index e4f74ecf..f82b4450 100644 --- a/tests/testthat/test-estimate_default.R +++ b/tests/testthat/test-estimate_default.R @@ -437,6 +437,30 @@ test_that("estimate.default keep with regex=TRUE", { expect_true(nrow(e1$coefmat) == 1) }) +test_that("robust argument backwards compatibility", { + d <- data.frame(y = rnorm(50), x = rnorm(50)) + g <- lm(y ~ x, data = d) + + # Both robust=TRUE and robust=FALSE emit a deprecation warning + e0 <- expect_warning(estimate(g, robust = FALSE), "deprecated and ignored") + e1 <- expect_warning(estimate(g, robust = TRUE), "deprecated and ignored") + expect_equal(e0$coefmat, e1$coefmat) + + e <- estimate(g) + # The robust argument is ignored: results are identical to the default + # (sandwich SEs) + expect_equal(e$coefmat, e0$coefmat) + + expect_equal(e$coefmat, e0$coefmat) + + # model-based SE can be obtained either via logical variable or supplying + # covariance matrix + e0m <- estimate(g, vcov = TRUE) + expect_false(all(e$coefmat == e0m$coefmat)) + e1m <- estimate(g, vcov = vcov(g)) + expect_equal(e0m$coefmat, e1m$coefmat) +}) + test_that("only.coef argument is deprecated", { expect_warning( result <- estimate(a3d, only.coef = TRUE), From 9b53102c6cac36333d1fa590f4e3e16f43a74537 Mon Sep 17 00:00:00 2001 From: Benedikt Sommer Date: Mon, 11 May 2026 11:36:52 +0200 Subject: [PATCH 5/6] minor comments --- R/estimate.default.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/estimate.default.R b/R/estimate.default.R index 2578036b..da6d4ff5 100644 --- a/R/estimate.default.R +++ b/R/estimate.default.R @@ -384,6 +384,8 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, } if (inherits(x, "lm") && family(x)$family == "gaussian" && is.null(df) && !missing(vcov)) { + # defaults to t-distribution when calculating p-values with model-based + # SEs df <- x$df.residual } if (!is.null(ic_theta) && (missing(vcov) || is.null(vcov))) { @@ -576,6 +578,8 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, if (!missing(null) && missing(contrast)) beta0 <- beta0-null if (!is.null(df)) { + # the idea should be to assign df as an attribute to the returned + # object, and re-use it inside summary.estimate za <- qt(1-alpha/2, df=df) pval <- 2*pt(abs(res[, 1]/res[, 2]), df=df, lower.tail=FALSE) } else { From 0ed875455c2ebb52e998ff6deb6848bc4c9aea44 Mon Sep 17 00:00:00 2001 From: Benedikt Sommer Date: Tue, 12 May 2026 13:54:49 +0200 Subject: [PATCH 6/6] rough implementation --- NEWS.md | 5 + R/diagtest.R | 2 +- R/estimate.default.R | 97 ++++++++++++++++--- R/estimate_calculus.R | 2 +- man/estimate.default.Rd | 17 ++-- man/summary.estimate.Rd | 33 +++++++ tests/testthat/test-estimate-deprecation.R | 65 +++++++++++++ .../test-estimate-summary-equivalence.R | 91 +++++++++++++++++ tests/testthat/test-estimate_default.R | 52 +++++----- tests/testthat/test-summary-requires-IC.R | 41 ++++++++ vignettes/influencefunction.Rmd | 12 +-- 11 files changed, 363 insertions(+), 54 deletions(-) create mode 100644 man/summary.estimate.Rd create mode 100644 tests/testthat/test-estimate-deprecation.R create mode 100644 tests/testthat/test-estimate-summary-equivalence.R create mode 100644 tests/testthat/test-summary-requires-IC.R diff --git a/NEWS.md b/NEWS.md index 2ac125f9..412bdfb6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # lava 1.9.1 Development version + - `estimate.default`: the `null`, `contrast`, `type`, and `var.adj` + arguments are soft-deprecated. Use + `summary(estimate(...), null=, contrast=, type=, var.adj=)` instead. + The default-path Wald p-value (H0: beta = 0) continues to be reported by + `estimate()`. - `estimate.default`: removed `score.deriv` and `folds` arguments. - `estimate.default`: removed `R` and `null.sim` arguments - Safe evaluation of rank in `wald_test` diff --git a/R/diagtest.R b/R/diagtest.R index a4b9e4d1..5c9dde14 100644 --- a/R/diagtest.R +++ b/R/diagtest.R @@ -156,7 +156,7 @@ diagtest <- function(table,positive=2,exact=FALSE,p0=NA,confint=c("logit","arcsi } } } - res <- estimate(M,calc_diag,print=prfun,null=c(rep(p0,7),0),back.transform=btransform,...) + res <- suppressWarnings(estimate(M,calc_diag,print=prfun,null=c(rep(p0,7),0),back.transform=btransform,...)) } CI <- confint[1] if (exact) CI <- "exact" diff --git a/R/estimate.default.R b/R/estimate.default.R index a8901298..98993c8b 100644 --- a/R/estimate.default.R +++ b/R/estimate.default.R @@ -20,15 +20,19 @@ estimate <- function(x, ...) UseMethod("estimate") ##' @param level level of confidence limits ##' @param IC if TRUE (default) the influence function decompositions are also ##' returned (extract with \code{IC} method) -##' @param type type of small-sample correction +##' @param type type of small-sample correction (deprecated; see +##' [summary.estimate]) ##' @param var.adj variance adjustment parameter for small-sample correction +##' (deprecated; see [summary.estimate]) ##' @param keep (optional) index of parameters to keep from final result ##' @param use (optional) index of parameters to use in calculations ##' @param regex If TRUE use regular expression (perl compatible) for keep, use ##' arguments ##' @param ignore.case Ignore case-sensitiveness in regular expression ##' @param contrast (optional) Contrast matrix for final Wald test -##' @param null (optional) null hypothesis to test +##' (deprecated; see [summary.estimate]) +##' @param null (optional) null hypothesis to test (deprecated; see +##' [summary.estimate]) ##' @param vcov (optional) covariance matrix of parameter estimates or logical. ##' If TRUE, then [stats::vcov] is used to obtain the covariance matrix from the ##' provided model object `x` @@ -66,16 +70,15 @@ estimate <- function(x, ...) UseMethod("estimate") ##' estimate(g) ##' ##' ## Testing contrasts -##' estimate(g, null=0) ##' estimate(g, rbind(c(1,1,0), c(1,0,2))) -##' estimate(g, rbind(c(1,1,0), c(1,0,2)), null=c(1,2)) +##' summary(estimate(g, rbind(c(1,1,0), c(1,0,2))), null=c(1,2)) ##' estimate(g, 2:3) ## same as cbind(0,1,-1) ##' estimate(g, as.list(2:3)) ## same as rbind(c(0,1,0),c(0,0,1)) ##' ## Alternative syntax ##' estimate(g, "z", "z"-"x", 2*"z"-3*"x") ##' estimate(g, "?") ## Wildcards ##' estimate(g, "*Int*", "z") -##' estimate(g, "1", "2"-"3", null = c(0,1)) +##' summary(estimate(g, "1", "2"-"3"), null = c(0,1)) ##' estimate(g, 2, 3) ##' ##' ## Usual (non-robust) confidence intervals @@ -219,6 +222,16 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, "Use the 'vcov' argument to compute model-based SEs." ) } + if (!missing(null) || !missing(contrast) || + !missing(type) || !missing(var.adj)) { + .Deprecated( + msg = paste0( + "The 'null', 'contrast', 'type', and 'var.adj' arguments of ", + "estimate.default() are deprecated. Use ", + "summary(estimate(...), null=, contrast=, type=, var.adj=) instead." + ) + ) + } if (!missing(use)) { p0 <- c( "f", "contrast", "only.coef", @@ -600,7 +613,7 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, names(coefs) <- rownames(res) } res <- structure(list(coef=coefs, coefmat=res, vcov=V, - IC=NULL, print=print, id=idstack), + IC=NULL, print=print, id=idstack, df=df), class="estimate") if (IC) ## && is.null(back.transform)) res$IC <- ic_theta @@ -640,7 +653,7 @@ estimate.default <- function(x=NULL, f=NULL, ..., data, id, rownames(res$coefmat) <- names(res$coef) } } - + if (!is.null(back.transform)) { res$coefmat[, c(1, 3, 4)] <- do.call(back.transform, list(res$coefmat[, c(1, 3, 4)])) @@ -825,20 +838,78 @@ with_unique_warnings <- function(expr) { }) } +##' Summary of estimate objects +##' +##' Computes hypothesis tests, contrasts, and small-sample corrections for +##' an [estimate] object. The arguments `null`, `contrast`, `type`, and +##' `var.adj` were previously available on [estimate.default()] and have +##' been moved here. +##' +##' @param object an `estimate` object. +##' @param contrast (optional) contrast matrix for the final Wald test. +##' @param null (optional) null hypothesis to test. +##' @param type type of small-sample correction. Requires the estimate +##' to have been computed with `IC=TRUE` (the default). +##' @param var.adj variance adjustment parameter for small-sample +##' correction. Requires the estimate to have been computed with +##' `IC=TRUE` (the default). +##' @param ... additional arguments passed to [estimate()]. +##' @seealso [estimate.default()] ##' @export summary.estimate <- function(object, contrast, + null, + type, + var.adj, ...) { with_unique_warnings({ p <- coef(object) - if (missing(contrast)) contrast <- diag(1,nrow=length(p))#as.list(seq_along(p)) - test <- estimate(coef=p, - vcov=vcov(object), - f = FALSE, - contrast = contrast, - ...) + df <- object$df + correction <- !missing(type) || !missing(var.adj) + user_contrast <- !missing(contrast) + user_null <- !missing(null) + if (missing(contrast)) contrast <- diag(1, nrow=length(p)) + args <- list( + coef = p, + contrast = contrast, + df = df + ) + ## When the user supplies any of null/contrast/type/var.adj we want + ## the recall to run the full Wald-test override (contrast.transform + ## must be TRUE). In the no-op case we preserve the historical + ## behavior of passing f=FALSE so the H0:beta=0 p-values in coefmat + ## are kept verbatim. + if (!user_contrast && !user_null && !correction) { + args$f <- FALSE + } + if (user_null) args$null <- null + if (correction) { + if (is.null(object$IC)) { + stop( + "Small-sample corrections in summary() require an estimate ", + "computed with IC=TRUE." + ) + } + args$IC <- object$IC + args$id <- object$id + if (!missing(type)) args$type <- type + if (!missing(var.adj)) args$var.adj <- var.adj + } else { + args$vcov <- vcov(object) + } + args <- c(args, list(...)) + test <- suppressWarnings(do.call(estimate, args)) class(test) <- "NULL" test$compare <- test$compare + ## Preserve dimnames on vcov: the recall path through type/var.adj + ## corrections builds V from the influence function and may drop + ## dimnames. Restore them from the original coef names so the + ## summary output is consistent with the deprecated estimate() path. + if (!is.null(test$vcov) && is.null(dimnames(test$vcov)) && + !is.null(names(test$coef)) && + nrow(test$vcov) == length(test$coef)) { + dimnames(test$vcov) <- list(names(test$coef), names(test$coef)) + } object <- test[c("coef", "coefmat", "vcov", "call", "ncluster", "model.index", "compare")] class(object) <- "summary.estimate" diff --git a/R/estimate_calculus.R b/R/estimate_calculus.R index 47416459..7c5d7963 100644 --- a/R/estimate_calculus.R +++ b/R/estimate_calculus.R @@ -584,5 +584,5 @@ operator_grad <- function(x, y, x_const, y_const, dx, dy) { if (!(is.numeric(e1) || is.numeric(e2))) stop("numeric comperator needed") null <- if (is.numeric(e1)) e1 else e2 e <- if (is.numeric(e1)) e2 else e1 - estimate(e, null=null) + suppressWarnings(estimate(e, null=null)) } diff --git a/man/estimate.default.Rd b/man/estimate.default.Rd index 91c019c1..9a62a268 100644 --- a/man/estimate.default.Rd +++ b/man/estimate.default.Rd @@ -62,9 +62,11 @@ expression or variable name)} \item{IC}{if TRUE (default) the influence function decompositions are also returned (extract with \code{IC} method)} -\item{type}{type of small-sample correction} +\item{type}{type of small-sample correction (deprecated; see +\link{summary.estimate})} -\item{var.adj}{variance adjustment parameter for small-sample correction} +\item{var.adj}{variance adjustment parameter for small-sample correction +(deprecated; see \link{summary.estimate})} \item{keep}{(optional) index of parameters to keep from final result} @@ -75,9 +77,11 @@ arguments} \item{ignore.case}{Ignore case-sensitiveness in regular expression} -\item{contrast}{(optional) Contrast matrix for final Wald test} +\item{contrast}{(optional) Contrast matrix for final Wald test +(deprecated; see \link{summary.estimate})} -\item{null}{(optional) null hypothesis to test} +\item{null}{(optional) null hypothesis to test (deprecated; see +\link{summary.estimate})} \item{vcov}{(optional) covariance matrix of parameter estimates or logical. If TRUE, then \link[stats:vcov]{stats::vcov} is used to obtain the covariance matrix from the @@ -125,16 +129,15 @@ estimate(g, g0) estimate(g) ## Testing contrasts -estimate(g, null=0) estimate(g, rbind(c(1,1,0), c(1,0,2))) -estimate(g, rbind(c(1,1,0), c(1,0,2)), null=c(1,2)) +summary(estimate(g, rbind(c(1,1,0), c(1,0,2))), null=c(1,2)) estimate(g, 2:3) ## same as cbind(0,1,-1) estimate(g, as.list(2:3)) ## same as rbind(c(0,1,0),c(0,0,1)) ## Alternative syntax estimate(g, "z", "z"-"x", 2*"z"-3*"x") estimate(g, "?") ## Wildcards estimate(g, "*Int*", "z") -estimate(g, "1", "2"-"3", null = c(0,1)) +summary(estimate(g, "1", "2"-"3"), null = c(0,1)) estimate(g, 2, 3) ## Usual (non-robust) confidence intervals diff --git a/man/summary.estimate.Rd b/man/summary.estimate.Rd new file mode 100644 index 00000000..b6f9710c --- /dev/null +++ b/man/summary.estimate.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimate.default.R +\name{summary.estimate} +\alias{summary.estimate} +\title{Summary of estimate objects} +\usage{ +\method{summary}{estimate}(object, contrast, null, type, var.adj, ...) +} +\arguments{ +\item{object}{an \code{estimate} object.} + +\item{contrast}{(optional) contrast matrix for the final Wald test.} + +\item{null}{(optional) null hypothesis to test.} + +\item{type}{type of small-sample correction. Requires the estimate +to have been computed with \code{IC=TRUE} (the default).} + +\item{var.adj}{variance adjustment parameter for small-sample +correction. Requires the estimate to have been computed with +\code{IC=TRUE} (the default).} + +\item{...}{additional arguments passed to \code{\link[=estimate]{estimate()}}.} +} +\description{ +Computes hypothesis tests, contrasts, and small-sample corrections for +an \link{estimate} object. The arguments \code{null}, \code{contrast}, \code{type}, and +\code{var.adj} were previously available on \code{\link[=estimate.default]{estimate.default()}} and have +been moved here. +} +\seealso{ +\code{\link[=estimate.default]{estimate.default()}} +} diff --git a/tests/testthat/test-estimate-deprecation.R b/tests/testthat/test-estimate-deprecation.R new file mode 100644 index 00000000..721fcad3 --- /dev/null +++ b/tests/testthat/test-estimate-deprecation.R @@ -0,0 +1,65 @@ +## Tests for soft deprecation of null/contrast/type/var.adj in estimate.default + +context("estimate.default deprecation warnings") + +make_glm <- function() { + set.seed(1) + d <- data.frame( + y = rbinom(100, 1, 0.5), + x = rnorm(100), + z = rnorm(100) + ) + glm(y ~ x + z, data = d, family = binomial()) +} + +test_that("estimate(..., null=) emits deprecation warning", { + g <- make_glm() + expect_warning(estimate(g, null = 0), class = "deprecatedWarning") +}) + +test_that("estimate(..., null=0) warns even when null=0 (Q7)", { + g <- make_glm() + expect_warning(estimate(g, null = 0), class = "deprecatedWarning") +}) + +test_that("estimate(..., contrast=) emits deprecation warning", { + g <- make_glm() + B <- rbind(c(1, 0, 0), c(0, 1, 0)) + expect_warning(estimate(g, contrast = B), class = "deprecatedWarning") +}) + +test_that("estimate(..., type=) emits deprecation warning", { + g <- make_glm() + expect_warning(estimate(g, type = "df"), class = "deprecatedWarning") +}) + +test_that("estimate(..., var.adj=) emits deprecation warning", { + g <- make_glm() + expect_warning( + estimate(g, type = "hc3", var.adj = 0.5), + class = "deprecatedWarning" + ) +}) + +test_that("multiple deprecated args produce exactly one deprecation warning", { + g <- make_glm() + ws <- capture_warnings(estimate(g, null = 1, type = "df")) + dep_msgs <- grep("deprecated", ws, value = TRUE) + expect_equal(length(dep_msgs), 1L) +}) + +test_that("internally-derived contrast from f does NOT warn (Q8)", { + g <- make_glm() + ws <- capture_warnings(estimate(g, "x")) + expect_equal(length(grep("deprecated", ws)), 0L) + ws <- capture_warnings(estimate(g, 2:3)) + expect_equal(length(grep("deprecated", ws)), 0L) + ws <- capture_warnings(estimate(g, rbind(c(1, 0, 0), c(0, 1, 0)))) + expect_equal(length(grep("deprecated", ws)), 0L) +}) + +test_that("plain estimate() emits no deprecation warning (Q4)", { + g <- make_glm() + ws <- capture_warnings(estimate(g)) + expect_equal(length(grep("deprecated", ws)), 0L) +}) diff --git a/tests/testthat/test-estimate-summary-equivalence.R b/tests/testthat/test-estimate-summary-equivalence.R new file mode 100644 index 00000000..ad6f11b2 --- /dev/null +++ b/tests/testthat/test-estimate-summary-equivalence.R @@ -0,0 +1,91 @@ +## Equivalence tests: estimate(g, X=...) should produce the same coefmat/vcov +## as summary(estimate(g), X=...) during the soft-deprecation window. + +context("estimate / summary equivalence for migrated args") + +make_glm <- function() { + set.seed(1) + d <- data.frame( + y = rbinom(100, 1, 0.5), + x = rnorm(100), + z = rnorm(100) + ) + glm(y ~ x + z, data = d, family = binomial()) +} + +## Helper: get coefmat from either deprecated or new path +deprecated_estimate <- function(...) { + suppressWarnings(estimate(...)) +} + +test_that("null= gives identical coefmat via estimate() and summary()", { + g <- make_glm() + e1 <- deprecated_estimate(g, null = 1) + e2 <- summary(estimate(g), null = 1) + expect_equal(e1$coefmat, e2$coefmat) + expect_equal(unname(e1$vcov), unname(e2$vcov)) +}) + +test_that("contrast= gives identical coefmat via estimate() and summary()", { + g <- make_glm() + B <- rbind(c(1, 0, 0), c(0, 1, -1)) + e1 <- deprecated_estimate(g, contrast = B) + e2 <- summary(estimate(g), contrast = B) + expect_equal(e1$coefmat, e2$coefmat) + expect_equal(unname(e1$vcov), unname(e2$vcov)) +}) + +test_that("null + contrast gives identical results via estimate() and summary()", { + g <- make_glm() + B <- rbind(c(1, 0, 0), c(0, 1, -1)) + e1 <- deprecated_estimate(g, contrast = B, null = c(0, 0)) + e2 <- summary(estimate(g), contrast = B, null = c(0, 0)) + expect_equal(e1$coefmat, e2$coefmat) + expect_equal(unname(e1$vcov), unname(e2$vcov)) +}) + +test_that("type='df' gives identical coefmat via estimate() and summary()", { + g <- make_glm() + e1 <- deprecated_estimate(g, type = "df") + e2 <- summary(estimate(g), type = "df") + expect_equal(e1$coefmat, e2$coefmat) + expect_equal(unname(e1$vcov), unname(e2$vcov)) +}) + +test_that("type='df1' gives identical coefmat via estimate() and summary()", { + g <- make_glm() + e1 <- deprecated_estimate(g, type = "df1") + e2 <- summary(estimate(g), type = "df1") + expect_equal(e1$coefmat, e2$coefmat) +}) + +test_that("type='hc3' + var.adj gives identical results via estimate() and summary()", { + g <- make_glm() + e1 <- deprecated_estimate(g, type = "hc3", var.adj = 0.5) + e2 <- summary(estimate(g), type = "hc3", var.adj = 0.5) + expect_equal(e1$coefmat, e2$coefmat) + expect_equal(unname(e1$vcov), unname(e2$vcov)) +}) + +test_that("type + null gives identical results via estimate() and summary()", { + g <- make_glm() + e1 <- deprecated_estimate(g, type = "df", null = 0.2) + e2 <- summary(estimate(g), type = "df", null = 0.2) + expect_equal(e1$coefmat, e2$coefmat) +}) + +test_that("df + null gives identical results via estimate() and summary()", { + g <- make_glm() + ## Per Q11: df is stored on the estimate object and reused by summary(). + e1 <- deprecated_estimate(g, df = 10, null = 1) + e2 <- summary(estimate(g, df = 10), null = 1) + expect_equal(e1$coefmat, e2$coefmat) +}) + +test_that("df is stored on the estimate object", { + g <- make_glm() + e <- estimate(g, df = 7) + expect_equal(e$df, 7) + e0 <- estimate(g) + expect_null(e0$df) +}) diff --git a/tests/testthat/test-estimate_default.R b/tests/testthat/test-estimate_default.R index f82b4450..d6467d69 100644 --- a/tests/testthat/test-estimate_default.R +++ b/tests/testthat/test-estimate_default.R @@ -82,8 +82,8 @@ test_that("summary.estimate compared with estimate", { q <- compute_wald(B, coef(a3d), vcov(a3d), null) df <- attr(q, "df") e1 <- estimate(a3d, f=B) - e1b <- estimate(a3d, f=B, null=null) - e1a <- estimate(e1, f=diag(3), null=null) + e1b <- suppressWarnings(estimate(a3d, f=B, null=null)) + e1a <- suppressWarnings(estimate(e1, f=diag(3), null=null)) expect_equal(unname(df), unname(e1a$compare$parameter)) expect_equal(unname(df), unname(e1b$compare$parameter)) expect_true(abs(pchisq(q, df=df, lower.tail=FALSE) - e1a$compare$p.value)<1e-16) @@ -92,8 +92,8 @@ test_that("summary.estimate compared with estimate", { expect_true(abs(q - e1b$compare$statistic) < 1e-9) # function spec. e2 <- estimate(a3d, function(p) c(p[1]-p[2], p[2]-p[3], p[1]-p[3])) - e2b <- estimate(a3d, function(p) c(p[1]-p[2], p[2]-p[3], p[1]-p[3]), null=null) - e2a <- estimate(e2, f=diag(3), null=null) + e2b <- suppressWarnings(estimate(a3d, function(p) c(p[1]-p[2], p[2]-p[3], p[1]-p[3]), null=null)) + e2a <- suppressWarnings(estimate(e2, f=diag(3), null=null)) expect_equal(unname(df), unname(e2a$compare$parameter)) expect_equal(unname(df), unname(e2b$compare$parameter)) expect_true(abs(pchisq(q, df=df, lower.tail=FALSE) - e2a$compare$p.value)<1e-16) @@ -113,8 +113,8 @@ test_that("summary.estimate compared with estimate", { q <- compute_wald(B, coef(a3d), vcov(a3d), null) df <- attr(q, "df") e1 <- estimate(a3d, f=B) - e1b <- estimate(a3d, f=B, null=null) - e1a <- estimate(e1, f=diag(4), null=null) + e1b <- suppressWarnings(estimate(a3d, f=B, null=null)) + e1a <- suppressWarnings(estimate(e1, f=diag(4), null=null)) expect_equal(unname(df), unname(e1a$compare$parameter)) expect_equal(unname(df), unname(e1b$compare$parameter)) expect_true(abs(pchisq(q, df=df, lower.tail=FALSE) - e1a$compare$p.value)<1e-16) @@ -131,7 +131,7 @@ test_that("summary.estimate compared with estimate", { test_that("1D: Identity contrast, null = 0", { B <- matrix(1, nrow = 1, ncol = 1) null <- 0 - e <- estimate(a1, f = B, null = null) + e <- suppressWarnings(estimate(a1, f = B, null = null)) p <- coef(a1) S <- vcov(a1) expected <- compute_wald(B, p, S, null) @@ -141,7 +141,7 @@ test_that("1D: Identity contrast, null = 0", { test_that("1D: Identity contrast, null = 1", { B <- matrix(1, nrow = 1, ncol = 1) null <- 1 - e <- estimate(a1, f = B, null = null) + e <- suppressWarnings(estimate(a1, f = B, null = null)) p <- coef(a1) S <- vcov(a1) expected <- compute_wald(B, p, S, null) @@ -151,7 +151,7 @@ test_that("1D: Identity contrast, null = 1", { test_that("1D: Scalar contrast (scaling), null = 0", { B <- matrix(2, nrow = 1, ncol = 1) null <- 0 - e <- estimate(a1, f = B, null = null) + e <- suppressWarnings(estimate(a1, f = B, null = null)) p <- coef(a1) S <- vcov(a1) expected <- compute_wald(B, p, S, null) @@ -161,7 +161,7 @@ test_that("1D: Scalar contrast (scaling), null = 0", { test_that("2D: Identity contrast, null = c(0, 0)", { B <- diag(2) null <- c(0, 0) - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -171,7 +171,7 @@ test_that("2D: Identity contrast, null = c(0, 0)", { test_that("2D: Identity contrast, null equals true coefs", { B <- diag(2) null <- c(1, 2) - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -184,7 +184,7 @@ test_that("2D: Identity contrast, null equals true coefs", { test_that("2D: Identity contrast, null = c(3, 4)", { B <- diag(2) null <- c(3, 4) - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -194,7 +194,7 @@ test_that("2D: Identity contrast, null = c(3, 4)", { test_that("2D: Difference contrast c(1,-1), null = 0", { B <- matrix(c(1, -1), nrow = 1) null <- 0 - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -204,7 +204,7 @@ test_that("2D: Difference contrast c(1,-1), null = 0", { test_that("2D: Difference contrast c(1,-1), null = -1", { B <- matrix(c(1, -1), nrow = 1) null <- -1 # Testing H0: a1 - a2 = -1 (true, since 1 - 2 = -1) - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -216,7 +216,7 @@ test_that("2D: Difference contrast c(1,-1), null = -1", { test_that("2D: Sum contrast c(1,1), null = 3", { B <- matrix(c(1, 1), nrow = 1) null <- 3 # True sum = 1 + 2 = 3 - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -228,7 +228,7 @@ test_that("2D: Sum contrast c(1,1), null = 3", { test_that("2D: Scaling contrast, null = c(2, 6)", { B <- 2 * diag(2) null <- c(2, 6) # True: 2*c(1,2) = c(2,4), so null != true - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) p <- coef(a) S <- vcov(a) expected <- compute_wald(B, p, S, null) @@ -238,7 +238,7 @@ test_that("2D: Scaling contrast, null = c(2, 6)", { test_that("3D: Identity contrast, null = true coefs", { B <- diag(3) null <- c(1, 2, 3) - e <- estimate(a3d, f = B, null = null) + e <- suppressWarnings(estimate(a3d, f = B, null = null)) p <- coef(a3d) S <- vcov(a3d) expected <- compute_wald(B, p, S, null) @@ -249,7 +249,7 @@ test_that("3D: Identity contrast, null = true coefs", { test_that("3D: Identity contrast, null = c(0, 0, 0)", { B <- diag(3) null <- c(0, 0, 0) - e <- estimate(a3d, f = B, null = null) + e <- suppressWarnings(estimate(a3d, f = B, null = null)) p <- coef(a3d) S <- vcov(a3d) expected <- compute_wald(B, p, S, null) @@ -263,7 +263,7 @@ test_that("3D: Pairwise differences, null = c(-1, -2)", { 0, 1, -1 ), nrow = 2, byrow = TRUE) null <- c(-1, -1) - e <- estimate(a3d, f = B, null = null) + e <- suppressWarnings(estimate(a3d, f = B, null = null)) p <- coef(a3d) S <- vcov(a3d) expected <- compute_wald(B, p, S, null) @@ -275,7 +275,7 @@ test_that("3D: Single-row contrast, null = 6", { # H0: a1 + a2 + a3 = 6 (true: 1+2+3=6) B <- matrix(c(1, 1, 1), nrow = 1) null <- 6 - e <- estimate(a3d, f = B, null = null) + e <- suppressWarnings(estimate(a3d, f = B, null = null)) p <- coef(a3d) S <- vcov(a3d) expected <- compute_wald(B, p, S, null) @@ -286,7 +286,7 @@ test_that("3D: Single-row contrast, null = 6", { test_that("4D: Identity contrast, null equals true coefs", { B <- diag(4) null <- c(1, 2, 3, 4) - e <- estimate(a4d, f = B, null = null) + e <- suppressWarnings(estimate(a4d, f = B, null = null)) p <- coef(a4d) S <- vcov(a4d) expected <- compute_wald(B, p, S, null) @@ -301,7 +301,7 @@ test_that("4D: Non-square contrast (2x4), null = c(0, 0)", { 0, 0, 1, -1 ), nrow = 2, byrow = TRUE) null <- c(0, 0) - e <- estimate(a4d, f = B, null = null) + e <- suppressWarnings(estimate(a4d, f = B, null = null)) p <- coef(a4d) S <- vcov(a4d) expected <- compute_wald(B, p, S, null) @@ -315,7 +315,7 @@ test_that("4D: Non-square contrast, null equals true values", { 0, 0, 1, -1 ), nrow = 2, byrow = TRUE) null <- c(-1, -1) - e <- estimate(a4d, f = B, null = null) + e <- suppressWarnings(estimate(a4d, f = B, null = null)) p <- coef(a4d) S <- vcov(a4d) expected <- compute_wald(B, p, S, null) @@ -326,11 +326,11 @@ test_that("4D: Non-square contrast, null equals true values", { test_that("Degrees of freedom equals nrow(B)", { B <- diag(2) null <- c(0, 0) - e <- estimate(a, f = B, null = null) + e <- suppressWarnings(estimate(a, f = B, null = null)) expect_identical(unname(e$compare$parameter), nrow(B)) B <- matrix(c(1, -1, 0, 0, 1, -1), nrow = 2, byrow = TRUE) null <- c(0, 0) - e <- estimate(a3d, f = B, null = null) + e <- suppressWarnings(estimate(a3d, f = B, null = null)) expect_equal(unname(e$compare$parameter), nrow(B)) }) @@ -423,7 +423,7 @@ test_that("estimate.default parsedesign dispatch", { expect_equal(e$coefmat[, 1], 2.0) # Character contrasts combine with null hypothesis vector. - e <- estimate(a3d, "a1", "a1", null = c(0, 1)) + e <- suppressWarnings(estimate(a3d, "a1", "a1", null = c(0, 1))) expect_true(e$coefmat[1, "P-value"] != e$coefmat[2, "P-value"]) }) diff --git a/tests/testthat/test-summary-requires-IC.R b/tests/testthat/test-summary-requires-IC.R new file mode 100644 index 00000000..76ebf304 --- /dev/null +++ b/tests/testthat/test-summary-requires-IC.R @@ -0,0 +1,41 @@ +## summary.estimate() requires IC=TRUE on the input estimate when applying +## small-sample corrections (type / var.adj). Per Q5. + +context("summary.estimate IC requirement") + +make_glm <- function() { + set.seed(1) + d <- data.frame( + y = rbinom(100, 1, 0.5), + x = rnorm(100), + z = rnorm(100) + ) + glm(y ~ x + z, data = d, family = binomial()) +} + +test_that("summary(..., type=) errors when input estimate has IC=FALSE", { + g <- make_glm() + e <- estimate(g, IC = FALSE) + expect_error( + summary(e, type = "df"), + "IC=TRUE", + fixed = TRUE + ) +}) + +test_that("summary(..., var.adj=) errors when input estimate has IC=FALSE", { + g <- make_glm() + e <- estimate(g, IC = FALSE) + expect_error( + summary(e, type = "hc3", var.adj = 0.5), + "IC=TRUE", + fixed = TRUE + ) +}) + +test_that("summary(..., null=) works when input estimate has IC=FALSE", { + ## null/contrast do NOT require IC, only type/var.adj do. + g <- make_glm() + e <- estimate(g, IC = FALSE) + expect_silent(summary(e, null = 1)) +}) diff --git a/vignettes/influencefunction.Rmd b/vignettes/influencefunction.Rmd index 75dfd81e..c4126a55 100644 --- a/vignettes/influencefunction.Rmd +++ b/vignettes/influencefunction.Rmd @@ -254,7 +254,7 @@ coef(e) ## ## Asymptotic (robust) variance estimate vcov(e) ## Matrix with estimates and confidence limits -estimate(e, null=0, level = 0.99) |> parameter() +estimate(e, level = 0.99) |> parameter() ## Influence curve IC(e) |> head() ## Join estimates @@ -445,7 +445,7 @@ As we have access to the joint asymptotic distribution we can for example test for whether the odds-ratio is the same for the two responses: ```{r hypo1} -estimate(e, cbind(0,1,0,-1), null=0) +summary(estimate(e), contrast=cbind(0,1,0,-1), null=0) ``` More details an be found in the Section on [hypothesis testing](#linear-contrasts-and-hypothesis-testing). @@ -855,7 +855,7 @@ estimate(gg, B) The $\bm{b}_0$ vector (default assumed to be zero) can be specified via the `null` argument ```{r estcontrast1} -estimate(gg, B, null=1) +summary(estimate(gg, B), null=1) ``` For testing multiple hypotheses we use that @@ -873,7 +873,7 @@ estimate(gg, B) Such linear statistics can also be specified directly as expressions of the parameter names ```{r} -estimate(gg, a + a.1, 2*a - a.2, a, null=c(2,1,1)) +summary(estimate(gg, a + a.1, 2*a - a.2, a), null=c(2,1,1)) ``` We refer to the function `lava::contr` and `lava::parsedesigns` for defining contrast matrices. @@ -885,7 +885,7 @@ comparisons of different exposure estimates: ```{r pairwise.diff} pairwise.diff(3) -estimate(gg, pairwise.diff(3), null=c(1,1,1), use=c(2,4,6)) +summary(estimate(gg, pairwise.diff(3), use=c(2,4,6)), null=c(1,1,1)) ``` When conducting multiple tests each at a nominal-level of $\alpha$ the overall @@ -913,7 +913,7 @@ in [@RitzPipper_multcomp] the joint distribution of $Z_{1},\ldots,Z_{p}$ can be estimated from the IFs. This is implemented in the `alpha_zmax` method ```{r pcorrect, eval=has_mets } -gg0 <- estimate(gg, use="^a", regex=TRUE, null=rep(.8, 3)) +gg0 <- suppressWarnings(estimate(gg, use="^a", regex=TRUE, null=rep(.8, 3))) alpha_zmax(gg0) ```