From a7022a527f6bb162f4fa43989448d747216a47fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Mon, 15 Jun 2026 16:23:06 +0200 Subject: [PATCH 01/16] Init --- NAMESPACE | 1 + R/annotate_gg_km.R | 51 ++-- R/get_surv_times_df.R | 90 +++++++ R/tbl_survfit_times.R | 281 ++++++++++----------- _pkgdown.yml | 1 + man/add_overall.tbl_survfit_times.Rd | 24 ++ man/get_surv_times_df.Rd | 50 ++++ man/tbl_survfit_times.Rd | 122 +++------ tests/testthat/_snaps/tbl_survfit_times.md | 81 ++---- tests/testthat/test-annotate_gg_km.R | 31 ++- tests/testthat/test-get_surv_times_df.R | 72 ++++++ tests/testthat/test-tbl_survfit_times.R | 143 +++++------ 12 files changed, 539 insertions(+), 408 deletions(-) create mode 100644 R/get_surv_times_df.R create mode 100644 man/add_overall.tbl_survfit_times.Rd create mode 100644 man/get_surv_times_df.Rd create mode 100644 tests/testthat/test-get_surv_times_df.R diff --git a/NAMESPACE b/NAMESPACE index 9c7147e52..18f42c9e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(g_lineplot) export(g_lineplot_table) export(get_cox_pairwise_df) export(get_mmrm_results) +export(get_surv_times_df) export(gg_km) export(gg_lineplot) export(gg_mmrm_lineplot) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index ca4b56e16..5144998a0 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -12,8 +12,11 @@ #' require a pure `ggplot2` object. #' @param fit_km (`survfit`)\cr #' A fitted Kaplan-Meier object of class `survfit` (from the `survival` -#' package). This object contains the necessary survival data used to -#' calculate and generate the content displayed in the annotation table. +#' package). This object contains the necessary survival data used by +#' `annotate_riskdf()` to calculate and generate the content displayed. +#' @param surv_tbl (`data.frame`)\cr +#' A data frame containing the pre-calculated survival summary results, +#' such as the output from `get_surv_times_df()`. #' @param coxph_tbl (`data.frame`)\cr #' A data frame containing the pre-calculated Cox-PH results, derived #' using function `get_cox_pairwise_df()`. @@ -102,10 +105,6 @@ annotate_riskdf <- function(gg_plt, paste0( "`gg_plt` must be a pure ggplot object (not a cowplot object) for", "`annotate_riskdf`." - ), - "i" = paste0( - "cowplot objects are not supported because", - "exact X-axis extraction is required." ) ) } @@ -185,18 +184,19 @@ annotate_riskdf <- function(gg_plt, } #' @describeIn annotate_gg_km The `annotate_surv_med` function adds a -#' median survival time summary table as an annotation box. +#' survival summary table as an annotation box. #' #' @return The function `annotate_surv_med` returns a `cowplot` object\cr -#' with the median survival table annotation added. +#' with the survival table annotation added. #' #' @examples -#' # Annotate Kaplan-Meier Plot with Median Survival Table -#' annotate_surv_med(plt_kmg01, fit_kmg01) +#' # Annotate Kaplan-Meier Plot with Survival Times Table +#' surv_df <- get_surv_times_df(fit_kmg01, times = c(100, 200)) +#' annotate_surv_med(plt_kmg01, surv_tbl = surv_df) #' #' @export annotate_surv_med <- function(gg_plt, - fit_km, + surv_tbl, table_position = c( x = 0.8, y = 0.85, @@ -204,8 +204,6 @@ annotate_surv_med <- function(gg_plt, h = 0.16 ), ...) { - set_cli_abort_call() - default_eargs <- list( font_size = 10, fill = TRUE @@ -214,27 +212,18 @@ annotate_surv_med <- function(gg_plt, eargs <- list(...) eargs <- utils::modifyList(default_eargs, eargs) - # Check explicitly allows cowplot objects for floating tables if (!inherits(gg_plt, c("gg", "ggplot", "cowplot"))) { rlang::abort("`gg_plt` must be a ggplot or cowplot object.") } - if (!inherits(fit_km, "survfit")) { - rlang::abort("`fit_km` must be a survfit object.") + if (!inherits(surv_tbl, "data.frame")) { + rlang::abort("`surv_tbl` must be a data.frame.") } - - strata_levels <- if (is.null(fit_km$strata)) "All" else levels(fit_km$strata) - - surv_med_tbl <- h_tbl_median_surv( - fit_km = fit_km, - strata_levels = strata_levels - ) - - if (!identical(rownames(surv_med_tbl), as.character(seq_len(nrow(surv_med_tbl))))) { - surv_med_tbl <- data.frame( - " " = rownames(surv_med_tbl), - surv_med_tbl, + if (!identical(rownames(surv_tbl), as.character(seq_len(nrow(surv_tbl))))) { + surv_tbl <- data.frame( + " " = rownames(surv_tbl), + surv_tbl, check.names = FALSE ) } @@ -243,7 +232,7 @@ annotate_surv_med <- function(gg_plt, # Call the floating table engine res <- df2gg_floating( - df = surv_med_tbl, + df = surv_tbl, gg_plt = gg_plt, x = table_position["x"], y = table_position["y"], @@ -260,7 +249,6 @@ annotate_surv_med <- function(gg_plt, #' @describeIn annotate_gg_km The function `annotate_coxph()` adds a Cox #' Proportional Hazards summary table as an annotation box. #' -#' #' @return The function `annotate_coxph` returns a `cowplot` object\cr #' with the Cox-PH table annotation added. #' @@ -285,8 +273,6 @@ annotate_coxph <- function(gg_plt, h = 0.125 ), ...) { - set_cli_abort_call() - default_eargs <- list( fill = TRUE, font_size = 10 @@ -295,7 +281,6 @@ annotate_coxph <- function(gg_plt, eargs <- list(...) eargs <- utils::modifyList(default_eargs, eargs) - # Check explicitly allows cowplot objects for floating tables if (!inherits(gg_plt, c("gg", "ggplot", "cowplot"))) { rlang::abort("`gg_plt` must be a ggplot or cowplot object.") } diff --git a/R/get_surv_times_df.R b/R/get_surv_times_df.R new file mode 100644 index 000000000..d67939014 --- /dev/null +++ b/R/get_surv_times_df.R @@ -0,0 +1,90 @@ +#' Generate Table of Survival Estimates at Specific Times +#' +#' @description +#' This function extracts survival probabilities, confidence intervals, and +#' numbers at risk from a `survfit` object at specified time points. It returns +#' a formatted `data.frame` that can be manipulated before passing to +#' `tbl_surv_times()`. +#' +#' @param fit_km (`survfit`)\cr +#' A fitted Kaplan-Meier object of class `survfit`. +#' @param times (`numeric`)\cr +#' A numeric vector of time points at which to evaluate survival estimates. +#' @param conf_int (`numeric`)\cr +#' The confidence level to use for the intervals. Defaults to `0.95`. +#' @param scale (`numeric`)\cr +#' A scaling factor for the survival estimates. For example, `100` converts +#' probabilities to percentages. Defaults to `1`. +#' +#' @return A `data.frame` with columns for `Strata`, `Time`, `N at Risk`, +#' `Survival`, and `XX% CI`. +#' +#' @examples +#' library(survival) +#' surv_data <- lung +#' surv_data$status <- surv_data$status - 1 +#' +#' # Example: Handling advanced arguments natively in survfit. +#' # Instead of passing `method.args`, apply them directly to survfit(). +#' # Here we set a 99% CI and specify a cluster ID (inst). +#' fit_complex <- survfit( +#' Surv(time, status) ~ sex, +#' data = surv_data, +#' id = inst, +#' conf.int = 0.99 +#' ) +#' +#' get_surv_times_df(fit_complex, times = c(100, 200), conf_int = 0.99) +#' +#' @export +get_surv_times_df <- function(fit_km, times, conf_int = 0.95, scale = 1) { + # Enforce rigorous type-checking using rlang + if (!inherits(fit_km, "survfit")) { + rlang::abort("`fit_km` must be a survfit object.") + } + + if (!is.numeric(times) || length(times) == 0) { + rlang::abort("`times` must be a non-empty numeric vector.") + } + + # Extract summary at specific times, extending to last known survival + # handles times beyond the max observed time gracefully. + summ <- summary(fit_km, times = times, extend = TRUE) + + strata_levels <- if (!is.null(summ$strata)) { + as.character(summ$strata) + } else { + "All" + } + + # Remove variable prefix from strata to maintain clean downstream headers + # without cluttering tables with 'arm=A', 'arm=B' etc. + if (!is.null(fit_km$strata)) { + strata_lst <- strsplit(sub("=", "equals", strata_levels), "equals") + strata_levels <- vapply( + strata_lst, + FUN = function(x) x[2], + FUN.VALUE = character(1) + ) + } + + df <- data.frame( + Strata = strata_levels, + Time = summ$time, + `N at Risk` = summ$n.risk, + Survival = sprintf("%.2f", summ$surv * scale), + CI = paste0( + "(", sprintf("%.2f", summ$lower * scale), + ", ", sprintf("%.2f", summ$upper * scale), ")" + ), + stringsAsFactors = FALSE, + check.names = FALSE + ) + + # Rename the general 'CI' column to reflect the exact interval used + # giving users an accurate header prior to any manual renaming steps + ci_col_name <- paste0(conf_int * 100, "% CI") + names(df)[names(df) == "CI"] <- ci_col_name + + df +} diff --git a/R/tbl_survfit_times.R b/R/tbl_survfit_times.R index 8725f6986..8d732d65c 100644 --- a/R/tbl_survfit_times.R +++ b/R/tbl_survfit_times.R @@ -1,169 +1,156 @@ -#' Survival Times +#' Create a gtsummary Table of Survival Times #' -#' Create a gtsummary table with Kaplan-Meier estimated survival estimates -#' and specified times. +#' @description +#' Generates a `gtsummary` table from the survival times `data.frame` created by +#' `get_surv_times_df()`. The table dynamically adapts to whichever columns the +#' user leaves in the data frame, allowing for easy customization. #' -#' @inheritParams tbl_survfit_quantiles -#' @inheritParams cardx::ard_survival_survfit -#' @inheritParams gtsummary::add_overall.tbl_summary -#' @param label (`string`)\cr -#' Label to appear in the header row. Default is `"Time {time}"`, where -#' the glue syntax injects the time estimate into the label. -#' @param statistic (`character`)\cr -#' Character vector of the statistics to report. -#' May use any of the following statistics: -#' `c(n.risk, estimate, std.error, conf.low, conf.high)`, -#' Default is `c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})")` +#' @param surv_df (`data.frame`)\cr +#' The results `data.frame` generated by `get_surv_times_df()`. #' -#' Statistics available to include when using `add_difference_row()` are: -#' `"estimate"`, `"std.error"`, `"statistic"`, `"conf.low"`, `"conf.high"`, `"p.value"`. -#' @param estimate_fun (`function`) \cr -#' Function used to style/round the `c(estimate, conf.low, conf.high)` statistics. -#' @param x (`tbl_survfit_times`)\cr -#' A stratified 'tbl_survfit_times' object +#' @return A `gtsummary` object. #' -#' @details -#' When the `statistic` argument is modified, the statistic labels will likely -#' also need to be updated. To change the label, call the `modify_table_body()` -#' function to directly update the underlying `x$table_body` data frame. +#' @examples +#' library(survival) +#' surv_data <- lung +#' surv_data$status <- surv_data$status - 1 #' -#' @returns a gtsummary table -#' @name tbl_survfit_times -#' @order 1 +#' # Example: Replicating "add_overall" functionality. +#' # 1. Fit the stratified model and extract data +#' fit_strat <- survfit(Surv(time, status) ~ sex, data = surv_data) +#' df_strat <- get_surv_times_df(fit_strat, times = c(100, 200)) +#' +#' # 2. Fit the overall unstratified model and extract data +#' fit_overall <- survfit(Surv(time, status) ~ 1, data = surv_data) +#' df_overall <- get_surv_times_df(fit_overall, times = c(100, 200)) +#' +#' # 3. Rename the unstratified label to "Overall" (instead of "All") +#' df_overall$Strata <- "Overall" +#' +#' # 4. Combine rows and render the single stacked table +#' combined_df <- rbind(df_overall, df_strat) +#' tbl_survfit_times(combined_df) #' -#' @examples -#' # Example 1 ---------------------------------- -#' tbl_survfit_times( -#' data = cards::ADTTE, -#' by = "TRTA", -#' times = c(30, 60), -#' label = "Day {time}" -#' ) |> -#' add_overall() -NULL - -#' @rdname tbl_survfit_times #' @export -#' @order 2 -tbl_survfit_times <- function(data, - times, - y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", - by = NULL, - label = "Time {time}", - statistic = c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})"), - estimate_fun = label_roche_number(digits = 1, scale = 100), - method.args = list(conf.int = 0.95, conf.type = "plain")) { - # check inputs --------------------------------------------------------------- - method.args <- enquo(method.args) - set_cli_abort_call() - check_not_missing(data) - check_not_missing(times) - check_data_frame(data) - check_numeric(times) - check_range(times, range = c(0, Inf)) - check_string(label) - check_class(statistic, "character") - check_class(estimate_fun, "function") - cards::process_selectors(data, by = {{ by }}) - check_scalar(by, allow_empty = TRUE) - if ("time" %in% by) { - cli::cli_abort( - "The {.arg by} column cannot be named {.val time}.", - call = get_cli_abort_call() - ) +tbl_survfit_times <- function(surv_df) { + if (!is.data.frame(surv_df)) { + rlang::abort("`surv_df` must be a data.frame.") + } + + if (!"Strata" %in% names(surv_df) || !"Time" %in% names(surv_df)) { + rlang::abort("`surv_df` must contain at least 'Strata' and 'Time' columns.") } - y <- .expr_as_string({{ y }}) # convert y to string (if not already) - func_inputs <- as.list(environment()) + ci_col <- grep("% CI", names(surv_df), value = TRUE) + has_ci <- length(ci_col) > 0 - # subset data on complete row ------------------------------------------------ - form <- glue("{y} ~ {ifelse(is_empty(by), 1, cardx::bt(by))}") |> stats::as.formula() - data <- data[stats::complete.cases(data[all.vars(form)]), ] + unique_strata <- unique(surv_df$Strata) - # get survival quantiles ----------------------------------------------------- - ard_surv_times <- - cardx::ard_survival_survfit( - x = data, - y = y, - variables = any_of(by), - times = times, - method.args = !!method.args - ) |> - cards::update_ard_fmt_fun( - stat_names = c("estimate", "conf.low", "conf.high"), - fmt_fun = estimate_fun - ) |> - cards::update_ard_fmt_fun( - stat_names = c("n.risk", "n.censor", "cum.risk", "cum.censor"), - fmt_fun = label_roche_number() + if (length(unique_strata) == 1) { + res <- .get_single_time_table( + surv_df, + ci_col = if (has_ci) ci_col[1] else NULL ) - - # calculate ARD for by vars - if (!is_empty(by)) { - ard_by <- cards::ard_tabulate(data, variables = all_of(by)) + } else { + res <- surv_df |> + gtsummary::tbl_strata( + strata = "Strata", + .combine_with = "tbl_stack", + .header = "{strata}", + .tbl_fun = function(x) { + .get_single_time_table( + x, + ci_col = if (has_ci) ci_col[1] else NULL + ) + } + ) |> + gtsummary::modify_header(groupname_col = " ") } - ard_n <- cards::ard_total_n(data) - # get the confidence level - conf.level <- - ard_surv_times |> - dplyr::filter(.data$stat_name == "conf.level") |> - dplyr::pull("stat") |> - unlist() + res <- res |> gtsummary::modify_header(label = "Time") + + # Ensure the object receives the correct class for S3 dispatch + class(res) <- c("tbl_survfit_times", class(res)) + + res +} + +#' Build Single Strata Table for Survival Times +#' @keywords internal +#' @noRd +.get_single_time_table <- function(data_subset, ci_col = NULL) { + data_subset$Time <- paste("Day", data_subset$Time) - # build gtsummary table ------------------------------------------------------ - tbl <- - dplyr::bind_rows( - ard_surv_times |> - # remove model-wide stats - dplyr::filter(.data$variable == "time") |> - dplyr::mutate( - variable = paste0(.data$variable, unlist(.data$variable_level)), - variable_level = NULL - ), - case_switch(!is_empty(by) ~ ard_by), - ard_n + vars_include <- c() + label_list <- list() + + if ("N at Risk" %in% names(data_subset)) { + vars_include <- c(vars_include, "N at Risk") + label_list <- c(label_list, list(`N at Risk` ~ "N at Risk")) + } + if ("Survival" %in% names(data_subset)) { + vars_include <- c(vars_include, "Survival") + label_list <- c(label_list, list(Survival ~ "Survival")) + } + if (!is.null(ci_col) && ci_col %in% names(data_subset)) { + vars_include <- c(vars_include, ci_col) + label_list <- c(label_list, stats::setNames(list(ci_col), ci_col)) + } + + res <- data_subset |> + cards::ard_mvsummary( + variables = dplyr::all_of(vars_include), + by = "Time", + statistic = ~ list(my_stat = \(x, ...) x[1]) ) |> gtsummary::tbl_ard_summary( - by = any_of(by), - type = starts_with("time") ~ "continuous2", - statistic = starts_with("time") ~ statistic, - label = - map(times, ~ glue::glue_data(list(time = .x), label)) |> - set_names(paste0("time", times)) + by = "Time", + type = list(gtsummary::everything() ~ "continuous"), + statistic = list(gtsummary::everything() ~ "{my_stat}"), + label = label_list, + missing = "no" ) |> - gtsummary::modify_header( - gtsummary::all_stat_cols() ~ "{level} \n(N = {n})", - label = "" - ) |> - gtsummary::modify_table_body( - ~ .x |> - dplyr::mutate( - label = dplyr::case_when( - .data$label == "Number of Subjects at Risk" ~ "Patients remaining at risk", - .data$label == "Survival Probability" ~ "Event Free Rate (%)", - .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), - .default = .data$label - ) - ) - ) + gtsummary::modify_header(gtsummary::all_stat_cols() ~ " ") |> + gtsummary::modify_footnote(gtsummary::everything() ~ NA) - # return tbl ----------------------------------------------------------------- - tbl$cards <- - list( - tbl_survfit_times = - dplyr::bind_rows( - ard_surv_times, - if (!is_empty(by)) ard_by, # styler: off - ard_n - ) - ) - tbl[["call_list"]] <- list(tbl_survfit_times = match.call()) - tbl$inputs <- func_inputs - tbl |> - structure(class = c("tbl_survfit_times", "gtsummary")) + if (!is.null(ci_col)) { + res <- res |> + gtsummary::modify_indent( + columns = "label", + rows = .data$variable == ci_col, + indent = 4L + ) + } + + res } +#' Add Overall Column to tbl_survfit_times (Legacy) +#' +#' @description +#' This function is maintained as a legacy method to provide a clear migration +#' path for existing scripts. Because the architecture of `tbl_survfit_times()` +#' was decoupled to separate data extraction from table rendering, it is no +#' longer possible to automatically compute unstratified overall statistics +#' from the rendered table object. +#' +#' @param x (`tbl_survfit_times`)\cr +#' A table object generated by `tbl_survfit_times()`. +#' @param ... Additional arguments passed to other methods. +#' +#' @return Defunct; throws an error with migration instructions. +#' +#' @method add_overall tbl_survfit_times #' @export -#' @rdname tbl_survfit_times -add_overall.tbl_survfit_times <- add_overall.tbl_survfit_quantiles +add_overall.tbl_survfit_times <- function(x, ...) { + rlang::abort( + paste( + "`add_overall()` is defunct for `tbl_survfit_times`.", + "Since we decoupled the architecture, a user wanting an overall column", + "alongside stratified columns should simply fit an unstratified model,", + "extract the dataframe, and use standard `dplyr::bind_rows()` with", + "their stratified dataframe before pushing the combined output into", + "`tbl_survfit_times()`." + ) + ) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index caa3260f2..0a943e94d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -46,6 +46,7 @@ reference: - gg_km - annotate_gg_km - get_cox_pairwise_df + - get_surv_times_df - title: "Forest Plot" contents: diff --git a/man/add_overall.tbl_survfit_times.Rd b/man/add_overall.tbl_survfit_times.Rd new file mode 100644 index 000000000..a6c0c3242 --- /dev/null +++ b/man/add_overall.tbl_survfit_times.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tbl_survfit_times.R +\name{add_overall.tbl_survfit_times} +\alias{add_overall.tbl_survfit_times} +\title{Add Overall Column to tbl_survfit_times (Legacy)} +\usage{ +\method{add_overall}{tbl_survfit_times}(x, ...) +} +\arguments{ +\item{x}{(\code{tbl_survfit_times})\cr +A table object generated by \code{tbl_survfit_times()}.} + +\item{...}{Additional arguments passed to other methods.} +} +\value{ +Defunct; throws an error with migration instructions. +} +\description{ +This function is maintained as a legacy method to provide a clear migration +path for existing scripts. Because the architecture of \code{tbl_survfit_times()} +was decoupled to separate data extraction from table rendering, it is no +longer possible to automatically compute unstratified overall statistics +from the rendered table object. +} diff --git a/man/get_surv_times_df.Rd b/man/get_surv_times_df.Rd new file mode 100644 index 000000000..80a8c91ae --- /dev/null +++ b/man/get_surv_times_df.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_surv_times_df.R +\name{get_surv_times_df} +\alias{get_surv_times_df} +\title{Generate Table of Survival Estimates at Specific Times} +\usage{ +get_surv_times_df(fit_km, times, conf_int = 0.95, scale = 1) +} +\arguments{ +\item{fit_km}{(\code{survfit})\cr +A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{times}{(\code{numeric})\cr +A numeric vector of time points at which to evaluate survival estimates.} + +\item{conf_int}{(\code{numeric})\cr +The confidence level to use for the intervals. Defaults to \code{0.95}.} + +\item{scale}{(\code{numeric})\cr +A scaling factor for the survival estimates. For example, \code{100} converts +probabilities to percentages. Defaults to \code{1}.} +} +\value{ +A \code{data.frame} with columns for \code{Strata}, \code{Time}, \verb{N at Risk}, +\code{Survival}, and \verb{XX\% CI}. +} +\description{ +This function extracts survival probabilities, confidence intervals, and +numbers at risk from a \code{survfit} object at specified time points. It returns +a formatted \code{data.frame} that can be manipulated before passing to +\code{tbl_surv_times()}. +} +\examples{ +library(survival) +surv_data <- lung +surv_data$status <- surv_data$status - 1 + +# Example: Handling advanced arguments natively in survfit. +# Instead of passing `method.args`, apply them directly to survfit(). +# Here we set a 99\% CI and specify a cluster ID (inst). +fit_complex <- survfit( + Surv(time, status) ~ sex, + data = surv_data, + id = inst, + conf.int = 0.99 +) + +get_surv_times_df(fit_complex, times = c(100, 200), conf_int = 0.99) + +} diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index c172dcf30..604aa7fd2 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -1,22 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tbl_survfit_times.R, R/add_difference_row.R -\name{tbl_survfit_times} -\alias{tbl_survfit_times} +% Please edit documentation in R/add_difference_row.R, R/tbl_survfit_times.R +\name{add_difference_row.tbl_survfit_times} \alias{add_difference_row.tbl_survfit_times} -\alias{add_overall.tbl_survfit_times} -\title{Survival Times} +\alias{tbl_survfit_times} +\title{Create a gtsummary Table of Survival Times} \usage{ -tbl_survfit_times( - data, - times, - y = "survival::Surv(time = AVAL, event = 1 - CNSR, type = 'right', origin = 0)", - by = NULL, - label = "Time {time}", - statistic = c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})"), - estimate_fun = label_roche_number(digits = 1, scale = 100), - method.args = list(conf.int = 0.95, conf.type = "plain") -) - \method{add_difference_row}{tbl_survfit_times}( x, reference, @@ -27,55 +15,11 @@ tbl_survfit_times( ... ) -\method{add_overall}{tbl_survfit_times}( - x, - last = FALSE, - col_label = "All Participants \\nN = {style_roche_number(N)}", - ... -) +tbl_survfit_times(surv_df) } \arguments{ -\item{data}{(\code{data.frame})\cr -A data frame} - -\item{times}{(\code{numeric})\cr -a vector of times for which to return survival probabilities.} - -\item{y}{(\code{string} or \code{expression})\cr -A string or expression with the survival outcome, e.g. \code{survival::Surv(time, status)}. -The default value is -\code{survival::Surv(time = AVAL, event = 1 - CNSR, type = "right", origin = 0)}.} - -\item{by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr -A single column from \code{data}. Summary statistics will be stratified by this variable. -Default is \code{NULL}, which returns results for the unstratified model.} - -\item{label}{(\code{string})\cr -Label to appear in the header row. Default is \code{"Time {time}"}, where -the glue syntax injects the time estimate into the label.} - -\item{statistic}{(\code{character})\cr -Character vector of the statistics to report. -May use any of the following statistics: -\code{c(n.risk, estimate, std.error, conf.low, conf.high)}, -Default is \code{c("{n.risk}", "{estimate}", "({conf.low}, {conf.high})")} - -Statistics available to include when using \code{add_difference_row()} are: -\code{"estimate"}, \code{"std.error"}, \code{"statistic"}, \code{"conf.low"}, \code{"conf.high"}, \code{"p.value"}.} - -\item{estimate_fun}{(\code{function}) \cr -Function used to style/round the \code{c(estimate, conf.low, conf.high)} statistics.} - -\item{method.args}{(named \code{list})\cr -Named list of arguments that will be passed to \code{survival::survfit()}. - -Note that this list may contain non-standard evaluation components, and -must be handled similarly to tidyselect inputs by using -rlang's embrace operator \code{{{ . }}} or \code{!!enquo()} when programming with this -function.} - -\item{x}{(\code{tbl_survfit_times})\cr -A stratified 'tbl_survfit_times' object} +\item{x}{(\code{tbl_summary})\cr +table created with \code{tbl_summary()}} \item{reference}{(\code{string})\cr Value of the \code{tbl_survfit_times(by)} variable value that is the reference for @@ -90,26 +34,22 @@ Function to round and format the \code{p.value} statistic. Default is \code{\lin The function must have a numeric vector input, and return a string that is the rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 3)}).} -\item{...}{These dots are for future extensions and must be empty.} +\item{estimate_fun}{(\code{\link[gtsummary:syntax]{formula-list-selector}})\cr +List of formulas specifying the functions +to round and format differences and confidence limits.} -\item{last}{(scalar \code{logical})\cr -Logical indicator to display overall column last in table. -Default is \code{FALSE}, which will display overall column first.} +\item{...}{These dots are for future extensions and must be empty.} -\item{col_label}{(\code{string})\cr -String indicating the column label. Default is \code{"**Overall** \nN = {style_number(N)}"}} +\item{surv_df}{(\code{data.frame})\cr +The results \code{data.frame} generated by \code{get_surv_times_df()}.} } \value{ -a gtsummary table +A \code{gtsummary} object. } \description{ -Create a gtsummary table with Kaplan-Meier estimated survival estimates -and specified times. -} -\details{ -When the \code{statistic} argument is modified, the statistic labels will likely -also need to be updated. To change the label, call the \code{modify_table_body()} -function to directly update the underlying \code{x$table_body} data frame. +Generates a \code{gtsummary} table from the survival times \code{data.frame} created by +\code{get_surv_times_df()}. The table dynamically adapts to whichever columns the +user leaves in the data frame, allowing for easy customization. } \section{Methods (by generic)}{ \itemize{ @@ -127,14 +67,6 @@ Pairwise differences are calculated relative to the specified \code{by} variable }} \examples{ -# Example 1 ---------------------------------- -tbl_survfit_times( - data = cards::ADTTE, - by = "TRTA", - times = c(30, 60), - label = "Day {time}" -) |> - add_overall() # Example 2 - Survival Differences ----------- tbl_survfit_times( data = cards::ADTTE, @@ -143,4 +75,24 @@ tbl_survfit_times( label = "Day {time}" ) |> add_difference_row(reference = "Placebo") +library(survival) +surv_data <- lung +surv_data$status <- surv_data$status - 1 + +# Example: Replicating "add_overall" functionality. +# 1. Fit the stratified model and extract data +fit_strat <- survfit(Surv(time, status) ~ sex, data = surv_data) +df_strat <- get_surv_times_df(fit_strat, times = c(100, 200)) + +# 2. Fit the overall unstratified model and extract data +fit_overall <- survfit(Surv(time, status) ~ 1, data = surv_data) +df_overall <- get_surv_times_df(fit_overall, times = c(100, 200)) + +# 3. Rename the unstratified label to "Overall" (instead of "All") +df_overall$Strata <- "Overall" + +# 4. Combine rows and render the single stacked table +combined_df <- rbind(df_overall, df_strat) +tbl_survfit_times(combined_df) + } diff --git a/tests/testthat/_snaps/tbl_survfit_times.md b/tests/testthat/_snaps/tbl_survfit_times.md index d53c80f25..a889e6e7f 100644 --- a/tests/testthat/_snaps/tbl_survfit_times.md +++ b/tests/testthat/_snaps/tbl_survfit_times.md @@ -1,73 +1,34 @@ -# tbl_survfit_times() works +# tbl_survfit_times() works for stratified data Code as.data.frame(tbl) Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 30 - 2 Patients remaining at risk 69 38 42 - 3 Event Free Rate (%) 84.4 53.0 53.4 - 4 95% CI (76.7, 92.2) (41.7, 64.4) (42.3, 64.4) - 5 Time 60 - 6 Patients remaining at risk 59 14 20 - 7 Event Free Rate (%) 76.8 24.3 31.1 - 8 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) - ---- - - Code - as.data.frame(tbl) - Output - Drug A \n(N = 98) Drug B \n(N = 102) - 1 Time 12 - 2 Patients remaining at risk 89 88 - 3 Event Free Rate (%) 90.8 86.3 - 4 95% CI (85.1, 96.5) (79.6, 93.0) - 5 Time 15 - 6 Patients remaining at risk 83 75 - 7 Event Free Rate (%) 84.7 73.5 - 8 95% CI (77.6, 91.8) (65.0, 82.1) - ---- + Time + 1 Placebo N at Risk 69.0 59.0 + 2 Survival 0.84 0.77 + 3 95% CI (0.77, 0.93) (0.68, 0.87) + 4 Xanomeline High Dose N at Risk 38.0 14.0 + 5 Survival 0.53 0.24 + 6 95% CI (0.43, 0.66) (0.16, 0.37) + 7 Xanomeline Low Dose N at Risk 42.0 20.0 + 8 Survival 0.53 0.31 + 9 95% CI (0.43, 0.66) (0.22, 0.44) + +# tbl_survfit_times() works for unstratified data Code as.data.frame(tbl) Output - Overall \n(N = 254) - 1 Time 30 - 2 Patients remaining at risk 149 - 3 Event Free Rate (%) 64.1 - 4 90% CI (59.1, 69.4) - 5 Time 60 - 6 Patients remaining at risk 93 - 7 Event Free Rate (%) 45.7 - 8 90% CI (40.5, 51.5) - -# tbl_survfit_times(by) messaging - - Code - tbl_survfit_times(data = cards::ADTTE, by = everything(), times = 30) - Condition - Error in `tbl_survfit_times()`: - ! The `by` argument must be length 1 or empty. - ---- - - Code - tbl_survfit_times(data = dplyr::rename(gtsummary::trial, time = trt), by = "time", y = "survival::Surv(ttdeath, death)", - times = 30) - Condition - Error in `tbl_survfit_times()`: - ! The `by` column cannot be named "time". + Time + 1 N at Risk 149.0 93.0 + 2 Survival 0.64 0.46 + 3 90% CI (0.58, 0.70) (0.40, 0.53) -# add_overall.tbl_survfit_times() works +# tbl_survfit_times() adapts to user modifications (dropping columns) Code - as.data.frame(add_overall(tbl_survfit_times(data = cards::ADTTE, by = "TRTA", times = 30, label = "Day {time}"), last = TRUE, col_label = "**All Participants** \nN = {n}")) + tbl_df Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) All Participants \nN = 254 - 1 Day 30 - 2 Patients remaining at risk 69 38 42 149 - 3 Event Free Rate (%) 84.4 53.0 53.4 64.1 - 4 95% CI (76.7, 92.2) (41.7, 64.4) (42.3, 64.4) (58.0, 70.2) + Time + 1 Survival 0.64 diff --git a/tests/testthat/test-annotate_gg_km.R b/tests/testthat/test-annotate_gg_km.R index a15f0a0dc..6378beed8 100644 --- a/tests/testthat/test-annotate_gg_km.R +++ b/tests/testthat/test-annotate_gg_km.R @@ -24,6 +24,16 @@ p_cow <- cowplot::ggdraw(p_base) # Used to trigger cowplot errors # Cox Data cox_df <- data.frame(Term = "B vs A", HR = "1.2", p = "0.05") +# Surv Times Data (for testing annotate_surv_med) +surv_df <- data.frame( + Strata = c("A", "B"), + Time = c(100, 100), + Survival = c("0.85", "0.75"), + check.names = FALSE +) +# Add rownames to trigger the table's rownames-to-column shifting logic +rownames(surv_df) <- c("Arm A", "Arm B") + # ------------------------------------------------------------------------------ # 2. TESTS FOR annotate_riskdf() # ------------------------------------------------------------------------------ @@ -62,32 +72,29 @@ test_that("annotate_surv_med() handles all branches and inputs", { expect_no_error( annotate_surv_med( p_base, - fit_strat, + surv_tbl = surv_df, table_position = c(x = 0.5, y = 0.5, w = 0.1, h = 0.1) ) ) # Success: Cowplot input - expect_no_error(annotate_surv_med(p_cow, fit_strat)) - - # Success: Unstratified - expect_no_error(annotate_surv_med(p_base, fit_single)) + expect_no_error(annotate_surv_med(p_cow, surv_tbl = surv_df)) # Error: Invalid plot expect_error( - annotate_surv_med(list(), fit_strat), + annotate_surv_med(list(), surv_tbl = surv_df), "must be a ggplot or cowplot object" ) - # Error: Invalid survfit + # Error: Invalid surv_tbl (Now expects a data.frame, not survfit) expect_error( - annotate_surv_med(p_base, list()), - "must be a survfit object" + annotate_surv_med(p_base, surv_tbl = list()), + "must be a data.frame" ) # Fill Logic: Custom color and FALSE - expect_no_error(annotate_surv_med(p_base, fit_strat, fill = "red")) - expect_no_error(annotate_surv_med(p_base, fit_strat, fill = FALSE)) + expect_no_error(annotate_surv_med(p_base, surv_tbl = surv_df, fill = "red")) + expect_no_error(annotate_surv_med(p_base, surv_tbl = surv_df, fill = FALSE)) }) # ------------------------------------------------------------------------------ @@ -103,7 +110,7 @@ test_that("annotate_coxph() handles all branches and inputs", { "must be a ggplot or cowplot object" ) - # Error: Invalid dataframe ( + # Error: Invalid dataframe expect_error(annotate_coxph(p_base, list()), "must be a data.frame") # Fill Logic: Custom color diff --git a/tests/testthat/test-get_surv_times_df.R b/tests/testthat/test-get_surv_times_df.R new file mode 100644 index 000000000..8b2d0529b --- /dev/null +++ b/tests/testthat/test-get_surv_times_df.R @@ -0,0 +1,72 @@ +skip_if_pkg_not_installed(c("survival")) + +test_that("get_surv_times_df() works for stratified and unstratified models", { + # Stratified Model + fit_strat <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ TRTA, + data = cards::ADTTE + ) + + df_strat <- get_surv_times_df(fit_strat, times = c(30, 60)) + + expect_s3_class(df_strat, "data.frame") + expect_equal( + colnames(df_strat), + c("Strata", "Time", "N at Risk", "Survival", "95% CI") + ) + expect_equal(unique(df_strat$Time), c(30, 60)) + expect_true(length(unique(df_strat$Strata)) > 1) + + # Unstratified Model + fit_unstrat <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ 1, + data = cards::ADTTE + ) + + df_unstrat <- get_surv_times_df(fit_unstrat, times = c(30, 60)) + + expect_s3_class(df_unstrat, "data.frame") + expect_equal(unique(df_unstrat$Strata), "All") +}) + +test_that("get_surv_times_df() applies scale and conf_int correctly", { + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ 1, + data = cards::ADTTE + ) + + df_scaled <- get_surv_times_df( + fit, + times = 30, + conf_int = 0.90, + scale = 100 + ) + + # CI column should rename automatically based on conf_int + expect_true("90% CI" %in% colnames(df_scaled)) + expect_false("95% CI" %in% colnames(df_scaled)) + + # Ensure the value string reflects the x100 scaling + # (e.g., "95.00" vs "0.95") + surv_val <- as.numeric(df_scaled$Survival[1]) + expect_true(surv_val > 1) +}) + +test_that("get_surv_times_df() catches invalid inputs", { + expect_error( + get_surv_times_df(cards::ADTTE, times = c(30, 60)), + "`fit_km` must be a survfit object", + fixed = TRUE + ) + + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ 1, + data = cards::ADTTE + ) + + expect_error( + get_surv_times_df(fit, times = NULL), + "`times` must be a non-empty numeric vector", + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 1fae42dd0..0bb284740 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -1,91 +1,92 @@ -skip_if_pkg_not_installed(c("survival", "withr")) +skip_if_pkg_not_installed(c("survival", "withr", "cards", "gtsummary")) -test_that("tbl_survfit_times() works", { +test_that("tbl_survfit_times() works for stratified data", { withr::local_options(list(width = 120)) - # Using the default value of the `y` argument - expect_silent( - tbl <- - tbl_survfit_times( - data = cards::ADTTE, - by = "TRTA", - times = c(30, 60) - ) - ) - expect_snapshot(as.data.frame(tbl)) - # Specifying the `y` argument - expect_silent( - tbl <- - tbl_survfit_times( - data = gtsummary::trial, - by = "trt", - y = "survival::Surv(ttdeath, death)", - times = c(12, 15) - ) + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ TRTA, + data = cards::ADTTE ) + df <- get_surv_times_df(fit, times = c(30, 60)) + + expect_silent(tbl <- tbl_survfit_times(df)) expect_snapshot(as.data.frame(tbl)) +}) + +test_that("tbl_survfit_times() works for unstratified data", { + withr::local_options(list(width = 120)) - # works for unstratified models - expect_silent( - tbl <- - tbl_survfit_times( - data = cards::ADTTE, - times = c(30, 60), - method.args = list(conf.int = 0.90) - ) + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ 1, + data = cards::ADTTE ) + # Also testing passing a custom confidence interval representation + df <- get_surv_times_df(fit, times = c(30, 60), conf_int = 0.90) + + expect_silent(tbl <- tbl_survfit_times(df)) expect_snapshot(as.data.frame(tbl)) +}) + +test_that("tbl_survfit_times() adapts to user modifications (dropping columns)", { + withr::local_options(list(width = 120)) - # works with NSE inputs in `method.args()` - expect_equal( - tbl_survfit_times( - data = cards::ADTTE, - times = c(30, 60), - method.args = list(id = SEX) - ) |> - gtsummary::gather_ard() |> - getElement("tbl_survfit_times") |> - dplyr::filter(variable == "time") |> - dplyr::select(-fmt_fun), - survival::survfit(survival::Surv(time = AVAL, event = 1 - CNSR) ~ 1, data = cards::ADTTE, id = SEX) |> - cardx::ard_survival_survfit(times = c(30, 60)) |> - dplyr::filter(variable == "time") |> - dplyr::select(-fmt_fun) + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ 1, + data = cards::ADTTE ) + df <- get_surv_times_df(fit, times = 30) + + # User dynamically drops the N at Risk column and renames CI + df_custom <- df[, c("Strata", "Time", "Survival")] + + expect_silent(tbl <- tbl_survfit_times(df_custom)) + + # Ensure the resulting gtsummary table doesn't break and only contains Survival + tbl_df <- as.data.frame(tbl) + expect_false(any(grepl("N at Risk", tbl_df[[1]]))) + expect_snapshot(tbl_df) }) -test_that("tbl_survfit_times(by) messaging", { +test_that("tbl_survfit_times() catches invalid inputs", { withr::local_options(list(width = 120)) - expect_snapshot( - error = TRUE, - tbl_survfit_times( - data = cards::ADTTE, - by = everything(), - times = 30 - ) + + # Fails when not a data frame + expect_error( + tbl_survfit_times(list(Time = 30, Survival = 0.5)), + "`surv_df` must be a data.frame", + fixed = TRUE ) - expect_snapshot( - error = TRUE, - tbl_survfit_times( - data = gtsummary::trial |> dplyr::rename(time = trt), - by = "time", - y = "survival::Surv(ttdeath, death)", - times = 30 - ) + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ 1, + data = cards::ADTTE + ) + df <- get_surv_times_df(fit, times = c(30, 60)) + + # Fails when missing mandatory columns + df_missing_time <- df[, c("Strata", "Survival")] + expect_error( + tbl_survfit_times(df_missing_time), + "`surv_df` must contain at least 'Strata' and 'Time' columns", + fixed = TRUE ) }) -test_that("add_overall.tbl_survfit_times() works", { - withr::local_options(list(width = 180)) - expect_snapshot( - tbl_survfit_times( - data = cards::ADTTE, - by = "TRTA", - times = 30, - label = "Day {time}" - ) |> - add_overall(last = TRUE, col_label = "**All Participants** \nN = {n}") |> - as.data.frame() +test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { + fit <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ TRTA, + data = cards::ADTTE + ) + df <- get_surv_times_df(fit, times = 30) + tbl <- tbl_survfit_times(df) + + # Check that the class was properly assigned + expect_s3_class(tbl, "tbl_survfit_times") + + # Check that calling add_overall() throws our informative migration error + expect_error( + gtsummary::add_overall(tbl), + "`add_overall()` is defunct for `tbl_survfit_times`", + fixed = TRUE ) }) From bca1372d299ea434a3866c387f30bfaf231b2413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 11:54:26 +0200 Subject: [PATCH 02/16] adjust gg_km test --- tests/testthat/test-gg_km.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index 4808401aa..dc0d283d8 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -15,6 +15,9 @@ model_formula <- rlang::new_formula( ) fit_kmg01 <- survival::survfit(model_formula, anl) +# Extract pre-calculated data frame for annotate_surv_med +surv_df <- get_surv_times_df(fit_kmg01, times = c(100, 200)) + test_that("gg_km() works and handles annotations correctly", { expect_no_error( surv_plot_data <- process_survfit(fit_kmg01) @@ -33,7 +36,7 @@ test_that("gg_km() works and handles annotations correctly", { # 1. Test floating annotations (These can be safely piped together) expect_no_error( plt_floats <- gg_km(surv_plot_data) |> - annotate_surv_med(fit_kmg01) |> + annotate_surv_med(surv_tbl = surv_df) |> annotate_coxph(coxph_tbl) ) @@ -66,7 +69,7 @@ test_that("plotlist attribute is preserved through annotate_* stacking", { expect_s3_class(plist_risk$table, "ggplot") # Single floating annotation: annotate_surv_med -> df2gg_floating - plt_med <- base_plt |> annotate_surv_med(fit_kmg01) + plt_med <- base_plt |> annotate_surv_med(surv_tbl = surv_df) plist_med <- attr(plt_med, "plotlist") expect_true(!is.null(plist_med)) expect_named(plist_med, c("main", "table")) @@ -74,7 +77,7 @@ test_that("plotlist attribute is preserved through annotate_* stacking", { # Stacked floating annotations: surv_med |> coxph plt_stacked <- base_plt |> - annotate_surv_med(fit_kmg01) |> + annotate_surv_med(surv_tbl = surv_df) |> annotate_coxph(coxph_tbl) plist_top <- attr(plt_stacked, "plotlist") From 11496d4455836c464ef4a16c1010a71d821689c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 10:17:52 +0200 Subject: [PATCH 03/16] Init; asterisks in tbl_survfit_quantiles (#267) **What changes are proposed in this pull request?** * `tbl_survfit_quantiles()` now indicates censored observation in min/max values with `*` (#192) Small change as requested in the issue below: Censored min/max range values are now marked with `*` Closes #192 -------------------------------------------------------------------------------- Pre-review Checklist (if item does not apply, mark is as complete) - [ ] **All** GitHub Action workflows pass with a :white_check_mark: - [ ] PR branch has pulled the most recent updates from master branch: `usethis::pr_merge_main()` - [ ] If a bug was fixed, a unit test was added. - [ ] Code coverage is suitable for any new functions/features (generally, 100% coverage for new code): `devtools::test_coverage()` - [ ] Request a reviewer Reviewer Checklist (if item does not apply, mark is as complete) - [ ] If a bug was fixed, a unit test was added. - [ ] Run `pkgdown::build_site()`. Check the R console for errors, and review the rendered website. - [ ] Code coverage is suitable for any new functions/features: `devtools::test_coverage()` When the branch is ready to be merged: - [ ] Update `NEWS.md` with the changes from this pull request under the heading "`# cards (development version)`". If there is an issue associated with the pull request, reference it in parentheses at the end update (see `NEWS.md` for examples). - [ ] **All** GitHub Action workflows pass with a :white_check_mark: - [ ] Approve Pull Request - [ ] Merge the PR. Please use "Squash and merge" or "Rebase and merge". --- NEWS.md | 2 + R/tbl_survfit_quantiles.R | 113 ++++++++++++++---- man/tbl_survfit_quantiles.Rd | 56 ++++++--- .../testthat/_snaps/tbl_survfit_quantiles.md | 24 +++- tests/testthat/test-tbl_survfit_quantiles.R | 38 +++++- 5 files changed, 183 insertions(+), 50 deletions(-) diff --git a/NEWS.md b/NEWS.md index 80031441d..1c5b24342 100644 --- a/NEWS.md +++ b/NEWS.md @@ -86,6 +86,8 @@ all events (#217) * The default `"log-rank"` test now uses `survival::survdiff()` instead of `coin`, aligning output more closely with SAS and `rtables` +* `tbl_survfit_quantiles()` now indicates censored observation in min/max values with `*` (#192) + # crane 0.3.1 ## New Functions and Functionality diff --git a/R/tbl_survfit_quantiles.R b/R/tbl_survfit_quantiles.R index 7df796695..32bec7657 100644 --- a/R/tbl_survfit_quantiles.R +++ b/R/tbl_survfit_quantiles.R @@ -41,7 +41,7 @@ #' #' Here's the general outline for creating this table directly from ARDs. #' 1. Create an ARD of survival quantiles using `cardx::ard_survival_survfit()`. -#' 2. Construct an ARD of the minimum and maximum survival times using `cards::ard_summary()`. +#' 2. Construct an ARD of the minimum and maximum survival times using `cards::ard_mvsummary()`. #' 3. Combine the ARDs and build summary table with `gtsummary::tbl_ard_summary()`. #' #' ```r @@ -60,48 +60,68 @@ #' variable_level = list(NULL) #' ) #' -#' # get the min/max followup time +#' # get the min/max followup time with censor flags +#' df_time <- cards::ADTTE |> +#' dplyr::mutate(time = AVAL, status = 1 - CNSR) +#' +#' # 1. Create a simple helper to check if an extreme time is censored +#' get_cens_flag <- function(data, val) { +#' is_cens <- all(data$status[data$time == val] == 0, na.rm = TRUE) +#' if (isTRUE(is_cens)) "*" else "" +#' } +#' +#' # 2. Get the min/max followup time with censor flags +#' df_time <- cards::ADTTE |> +#' dplyr::mutate(time = AVAL, status = 1 - CNSR) +#' #' ard_surv_min_max <- -#' cards::ard_summary( -#' data = cards::ADTTE, -#' variables = AVAL, +#' cards::ard_mvsummary( +#' df_time, +#' variables = "time", #' by = "TRTA", -#' statistic = everything() ~ cards::continuous_summary_fns(c("min", "max")) +#' statistic = list( +#' time = list( +#' min = function(x, data, ...) min(data$time, na.rm = TRUE), +#' max = function(x, data, ...) max(data$time, na.rm = TRUE), +#' min_cens = function(x, data, ...) get_cens_flag(data, min(data$time, na.rm = TRUE)), +#' max_cens = function(x, data, ...) get_cens_flag(data, max(data$time, na.rm = TRUE)) +#' ) +#' ) #' ) #' -#' # stack the ARDs and pass them to `tbl_ard_summary()` +#' # 3. Stack the ARDs and format the table #' cards::bind_ard( #' ard_surv_quantiles, #' ard_surv_min_max #' ) |> #' tbl_ard_summary( #' by = "TRTA", -#' type = list(prob = "continuous2", AVAL = "continuous"), +#' type = list(prob = "continuous2", time = "continuous"), #' statistic = list( #' prob = c("{estimate50}", "({conf.low50}, {conf.high50})", "{estimate25}, {estimate75}"), -#' AVAL = "{min} to {max}" +#' time = "{min}{min_cens} to {max}{max_cens}" #' ), #' label = list( #' prob = "Time to event", -#' AVAL = "Range" +#' time = "Range" #' ) #' ) |> -#' # directly modify the labels in the table to match spec +#' # cleanly rename the default ARD statistics #' modify_table_body( #' ~ .x |> #' dplyr::mutate( -#' label = dplyr::case_when( -#' .data$label == "Survival Probability" ~ "Median", -#' .data$label == "(CI Lower Bound, CI Upper Bound)" ~ "95% CI", -#' .data$label == "Survival Probability, Survival Probability" ~ "25% and 75%-ile", -#' .default = .data$label +#' label = dplyr::case_match( +#' label, +#' "Survival Probability" ~ "Median", +#' "(CI Lower Bound, CI Upper Bound)" ~ "95% CI", +#' "Survival Probability, Survival Probability" ~ "25% and 75%-ile", +#' .default = label #' ) #' ) #' ) |> #' # update indentation to match spec #' modify_indent(columns = "label", rows = label == "95% CI", indent = 8L) |> -#' modify_indent(columns = "label", rows = .data$label == "Range", indent = 4L) |> -#' # remove default footnotes +#' modify_indent(columns = "label", rows = label == "Range", indent = 4L) |> #' remove_footnote_header(columns = all_stat_cols()) #' ``` #' @@ -173,19 +193,33 @@ tbl_survfit_quantiles <- function(data, formula = form, data = data ) |> - stats::setNames(c("time", by)) |> - dplyr::mutate(time = .data$time[, 1]) + stats::setNames(c("surv_obj", by)) |> + dplyr::mutate( + time = .data$surv_obj[, 1], + status = .data$surv_obj[, 2] + ) ard_followup_range <- - cards::ard_summary( + cards::ard_mvsummary( df_time, variables = "time", by = any_of(by), - statistic = everything() ~ cards::continuous_summary_fns(c("min", "max")) + statistic = list( + time = list( + min = function(x, data, ...) min(data$time, na.rm = TRUE), + max = function(x, data, ...) max(data$time, na.rm = TRUE), + min_cens = function(x, data, ...) .get_censoring_flag(data, "min"), + max_cens = function(x, data, ...) .get_censoring_flag(data, "max") + ) + ) ) |> cards::update_ard_fmt_fun( stat_names = c("min", "max"), fmt_fun = estimate_fun + ) |> + cards::update_ard_fmt_fun( + stat_names = c("min_cens", "max_cens"), + fmt_fun = as.character ) # calculate ARD for by vars @@ -220,7 +254,7 @@ tbl_survfit_quantiles <- function(data, type = list(prob = "continuous2", time = "continuous"), statistic = list( prob = c("{estimate50}", "({conf.low50}, {conf.high50})", "{estimate25}, {estimate75}"), - time = "{min} to {max}" + time = "{min}{min_cens} to {max}{max_cens}" ), label = list( prob = header, @@ -272,6 +306,39 @@ tbl_survfit_quantiles <- function(data, structure(class = c("tbl_survfit_quantiles", "gtsummary")) } +#' Check for Censoring at Extreme Times +#' +#' @description +#' Helper function to evaluate whether all observations at a given extreme time +#' point (minimum or maximum) are censored. This flag is used to append +#' asterisks to survival quantiles in summary tables. +#' +#' @param data (`data.frame`)\cr +#' A data frame containing the survival data with columns `time` and `status`. +#' @param type (`character`)\cr +#' A string indicating which extreme to evaluate, either `"min"` or `"max"`. +#' +#' @return A character string. Returns `"*"` if all events at the evaluated time +#' are censored, and `""` otherwise. +#' @noRd +.get_censoring_flag <- function(data, type = c("min", "max")) { + type <- match.arg(type) + + # Use dynamic function matching to prevent repeating the subsetting logic + # for both boundaries + fun <- match.fun(type) + val <- fun(data$time, na.rm = TRUE) + + # Evaluate true censoring by ensuring all records tied at the boundary time + # strictly have an event status of 0 + is_cens <- all( + data$status[data$time == val & !is.na(data$time)] == 0, + na.rm = TRUE + ) + + if (isTRUE(is_cens)) "*" else "" +} + #' @export #' @rdname tbl_survfit_quantiles add_overall.tbl_survfit_quantiles <- function(x, diff --git a/man/tbl_survfit_quantiles.Rd b/man/tbl_survfit_quantiles.Rd index 645f28087..9f9bd7677 100644 --- a/man/tbl_survfit_quantiles.Rd +++ b/man/tbl_survfit_quantiles.Rd @@ -80,7 +80,7 @@ it from ARDs. Here's the general outline for creating this table directly from ARDs. \enumerate{ \item Create an ARD of survival quantiles using \code{cardx::ard_survival_survfit()}. -\item Construct an ARD of the minimum and maximum survival times using \code{cards::ard_summary()}. +\item Construct an ARD of the minimum and maximum survival times using \code{cards::ard_mvsummary()}. \item Combine the ARDs and build summary table with \code{gtsummary::tbl_ard_summary()}. } @@ -99,48 +99,68 @@ ard_surv_quantiles <- variable_level = list(NULL) ) -# get the min/max followup time +# get the min/max followup time with censor flags +df_time <- cards::ADTTE |> + dplyr::mutate(time = AVAL, status = 1 - CNSR) + +# 1. Create a simple helper to check if an extreme time is censored +get_cens_flag <- function(data, val) \{ + is_cens <- all(data$status[data$time == val] == 0, na.rm = TRUE) + if (isTRUE(is_cens)) "*" else "" +\} + +# 2. Get the min/max followup time with censor flags +df_time <- cards::ADTTE |> + dplyr::mutate(time = AVAL, status = 1 - CNSR) + ard_surv_min_max <- - cards::ard_summary( - data = cards::ADTTE, - variables = AVAL, + cards::ard_mvsummary( + df_time, + variables = "time", by = "TRTA", - statistic = everything() ~ cards::continuous_summary_fns(c("min", "max")) + statistic = list( + time = list( + min = function(x, data, ...) min(data$time, na.rm = TRUE), + max = function(x, data, ...) max(data$time, na.rm = TRUE), + min_cens = function(x, data, ...) get_cens_flag(data, min(data$time, na.rm = TRUE)), + max_cens = function(x, data, ...) get_cens_flag(data, max(data$time, na.rm = TRUE)) + ) + ) ) -# stack the ARDs and pass them to `tbl_ard_summary()` +# 3. Stack the ARDs and format the table cards::bind_ard( ard_surv_quantiles, ard_surv_min_max ) |> tbl_ard_summary( by = "TRTA", - type = list(prob = "continuous2", AVAL = "continuous"), + type = list(prob = "continuous2", time = "continuous"), statistic = list( prob = c("\{estimate50\}", "(\{conf.low50\}, \{conf.high50\})", "\{estimate25\}, \{estimate75\}"), - AVAL = "\{min\} to \{max\}" + time = "\{min\}\{min_cens\} to \{max\}\{max_cens\}" ), label = list( prob = "Time to event", - AVAL = "Range" + time = "Range" ) ) |> - # directly modify the labels in the table to match spec + # cleanly rename the default ARD statistics modify_table_body( ~ .x |> dplyr::mutate( - label = dplyr::case_when( - .data$label == "Survival Probability" ~ "Median", - .data$label == "(CI Lower Bound, CI Upper Bound)" ~ "95\% CI", - .data$label == "Survival Probability, Survival Probability" ~ "25\% and 75\%-ile", - .default = .data$label + label = dplyr::case_match( + label, + "Survival Probability" ~ "Median", + "(CI Lower Bound, CI Upper Bound)" ~ "95\% CI", + "Survival Probability, Survival Probability" ~ "25\% and 75\%-ile", + .default = label ) ) ) |> # update indentation to match spec modify_indent(columns = "label", rows = label == "95\% CI", indent = 8L) |> - modify_indent(columns = "label", rows = .data$label == "Range", indent = 4L) |> - # remove default footnotes + modify_indent(columns = "label", rows = label == "Range", indent = 4L) |> remove_footnote_header(columns = all_stat_cols()) }\if{html}{\out{}} } diff --git a/tests/testthat/_snaps/tbl_survfit_quantiles.md b/tests/testthat/_snaps/tbl_survfit_quantiles.md index d211a0955..c032c4127 100644 --- a/tests/testthat/_snaps/tbl_survfit_quantiles.md +++ b/tests/testthat/_snaps/tbl_survfit_quantiles.md @@ -8,19 +8,31 @@ 2 Median NE 36.0 33.0 3 95% CI (NE, NE) (24.0, 46.0) (27.0, 48.0) 4 25% and 75%-ile 70.0, NE 14.0, 58.0 19.0, 80.0 - 5 Range 1.0 to 198.0 1.0 to 189.0 1.0 to 190.0 + 5 Range 1.0 to 198.0* 1.0 to 189.0* 1.0 to 190.0* --- Code - as.data.frame(tbl) + as.data.frame(tbl_str) + Output + Drug A \n(N = 98) Drug B \n(N = 102) + 1 Time to event + 2 Median 23.5 21.2 + 3 95% CI (20.9, NE) (18.0, NE) + 4 25% and 75%-ile 17.4, NE 14.5, NE + 5 Range 3.5 to 24.0* 5.3 to 24.0* + +--- + + Code + as.data.frame(tbl_expr) Output Drug A \n(N = 98) Drug B \n(N = 102) 1 Time to event 2 Median 23.5 21.2 3 95% CI (20.9, NE) (18.0, NE) 4 25% and 75%-ile 17.4, NE 14.5, NE - 5 Range 3.5 to 24.0 5.3 to 24.0 + 5 Range 3.5 to 24.0* 5.3 to 24.0* --- @@ -32,7 +44,7 @@ 2 Median 51.0 3 90% CI (46.0, 68.0) 4 25% and 75%-ile 22.0, NE - 5 Range 1.0 to 198.0 + 5 Range 1.0 to 198.0* # tbl_survfit_quantiles(by) messaging @@ -60,7 +72,7 @@ 2 Median NA 36.000 33.000 3 95% CI (NA, NA) (24.000, 46.000) (27.000, 48.000) 4 25% and 75%-ile 70.000, NA 14.000, 58.000 19.000, 80.000 - 5 Range 1.000 to 198.000 1.000 to 189.000 1.000 to 190.000 + 5 Range 1.000 to 198.000* 1.000 to 189.000* 1.000 to 190.000* # add_overall.tbl_survfit_quantiles() works @@ -72,5 +84,5 @@ 2 Median NE 36.0 33.0 51.0 3 95% CI (NE, NE) (24.0, 46.0) (27.0, 48.0) (43.0, 70.0) 4 25% and 75%-ile 70.0, NE 14.0, 58.0 19.0, 80.0 22.0, NE - 5 Range 1.0 to 198.0 1.0 to 189.0 1.0 to 190.0 1.0 to 198.0 + 5 Range 1.0 to 198.0* 1.0 to 189.0* 1.0 to 190.0* 1.0 to 198.0* diff --git a/tests/testthat/test-tbl_survfit_quantiles.R b/tests/testthat/test-tbl_survfit_quantiles.R index 65c42d826..65f6250d2 100644 --- a/tests/testthat/test-tbl_survfit_quantiles.R +++ b/tests/testthat/test-tbl_survfit_quantiles.R @@ -2,6 +2,7 @@ skip_if_pkg_not_installed(c("survival", "withr")) test_that("tbl_survfit_quantiles() works", { withr::local_options(list(width = 120)) + # Using the default value of the `y` argument expect_silent( tbl <- @@ -12,16 +13,27 @@ test_that("tbl_survfit_quantiles() works", { ) expect_snapshot(as.data.frame(tbl)) - # Specifying the `y` argument + # Specifying the `y` argument as a character string expect_silent( - tbl <- + tbl_str <- tbl_survfit_quantiles( data = gtsummary::trial, by = "trt", y = "survival::Surv(ttdeath, death)" ) ) - expect_snapshot(as.data.frame(tbl)) + expect_snapshot(as.data.frame(tbl_str)) + + # Specifying the `y` argument as an unquoted expression + expect_silent( + tbl_expr <- + tbl_survfit_quantiles( + data = gtsummary::trial, + by = "trt", + y = survival::Surv(ttdeath, death) + ) + ) + expect_snapshot(as.data.frame(tbl_expr)) # works for unstratified models expect_silent( @@ -50,6 +62,26 @@ test_that("tbl_survfit_quantiles() works", { ) }) +test_that("tbl_survfit_quantiles() censoring asterisks logic", { + tbl <- tbl_survfit_quantiles( + data = cards::ADTTE, + by = "TRTA" + ) + + # 1. ARD check: Ensure 'min' and 'max' stats are strictly numeric and do not contain '*' + ard_minmax <- tbl$cards$tbl_survfit_quantiles |> + dplyr::filter(stat_name %in% c("min", "max")) + + expect_false(any(grepl("\\*", as.character(unlist(ard_minmax$stat))))) + expect_true(all(vapply(ard_minmax$stat, is.numeric, FUN.VALUE = logical(1)))) + + # 2. Table check: Ensure '*' is printed in the Range row of the final table body + range_row <- tbl$table_body |> dplyr::filter(label == "Range") + + # ADTTE has censored maximums, so we expect at least one '*' to be rendered in the table columns + expect_true(any(grepl("\\*", unlist(range_row)))) +}) + test_that("tbl_survfit_quantiles(by) messaging", { withr::local_options(list(width = 120)) expect_snapshot( From 6b257a360c6aa1b07456040e31657fecbff102e3 Mon Sep 17 00:00:00 2001 From: jszczypinski <79863450+jszczypinski@users.noreply.github.com> Date: Wed, 17 Jun 2026 08:18:45 +0000 Subject: [PATCH 04/16] [skip actions] Bump version to 0.3.3.9000 --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ac9cb50fc..dc59fccd6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: crane Title: Supplements the 'gtsummary' Package for Pharmaceutical Reporting -Version: 0.3.3 +Version: 0.3.3.9000 Authors@R: c( person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-0862-2018", note = "Original creator of the package")), diff --git a/NEWS.md b/NEWS.md index 1c5b24342..ecaa2e914 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# crane 0.3.3 +# crane 0.3.3.9000 * Fixed minor typo in the DESCRIPTION file. From 4e819eb1ad54ddf4677ad8712c2a75337cdc8e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 12:36:26 +0200 Subject: [PATCH 05/16] Adjust tests for gg_km and tbl_survfit_times --- tests/testthat/test-gg_km.R | 2 +- tests/testthat/test-tbl_survfit_times.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index dc0d283d8..c9e6b6da4 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -129,4 +129,4 @@ test_that("df2gg engines (aligned and floating) work correctly", { hline = FALSE ) ) -}) +}) \ No newline at end of file diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 0bb284740..414c21a33 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -89,4 +89,4 @@ test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { "`add_overall()` is defunct for `tbl_survfit_times`", fixed = TRUE ) -}) +}) \ No newline at end of file From 382610f198de56c89a845c513b4b822f535a386e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 13:03:00 +0200 Subject: [PATCH 06/16] Update add_difference_row.R --- R/add_difference_row.R | 94 +++++++++++++-------- tests/testthat/_snaps/add_difference_row.md | 48 +---------- tests/testthat/test-add_difference_row.R | 58 ++++++------- 3 files changed, 93 insertions(+), 107 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 173ea0a10..8c65ee0a8 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -3,19 +3,19 @@ #' Adds survival differences between groups as additional rows to tables created by [`tbl_survfit_times()`]. #' #' Difference statistics are calculated using [cardx::ard_survival_survfit_diff()] -#' for all `tbl_survfit_times(times)` variable values, using `survfit` formula: -#' ```r -#' survival::survfit(y ~ by, data = data) -#' ``` -#' where `y`, `by` and `data` are the inputs of the same names to the `tbl_survfit_times()` object `x`. +#' for all specified timepoints, extracting the formula and dataset directly from the provided `fit` object. #' #' Pairwise differences are calculated relative to the specified `by` variable's specified reference level. #' #' @inheritParams gtsummary::add_difference.tbl_summary +#' @param fit (`survfit`)\cr +#' The original `survival::survfit()` model used to extract the survival times. +#' @param times (`numeric`)\cr +#' Numeric vector of times at which to calculate the differences. If `NULL` (default), the function +#' will attempt to infer the times dynamically from the table inputs. #' @param reference (`string`)\cr -#' Value of the `tbl_survfit_times(by)` variable value that is the reference for -#' each of the difference calculations. For factors, use the character -#' level. The reference column will appear as the leftmost column in the table. +#' Value of the `by` variable that is the reference for each of the difference calculations. +#' For factors, use the character level. The reference column will appear as the leftmost column in the table. #' @param pvalue_fun (`function`)\cr #' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. #' The function must have a numeric vector input, and return a string that is the @@ -26,15 +26,15 @@ #' #' @examples #' # Example 2 - Survival Differences ----------- -#' tbl_survfit_times( -#' data = cards::ADTTE, -#' by = "TRTA", -#' times = c(30, 60), -#' label = "Day {time}" -#' ) |> -#' add_difference_row(reference = "Placebo") +#' fit <- survival::survfit(survival::Surv(AVAL, 1 - CNSR) ~ TRTA, data = cards::ADTTE) +#' surv_df <- get_surv_times_df(fit, times = c(30, 60)) +#' +#' tbl_survfit_times(surv_df, label = "Day {time}") |> +#' add_difference_row(fit = fit, reference = "Placebo") add_difference_row.tbl_survfit_times <- function(x, + fit, reference, + times = NULL, statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), conf.level = 0.95, pvalue_fun = label_roche_pvalue(), @@ -43,6 +43,7 @@ add_difference_row.tbl_survfit_times <- function(x, # check inputs --------------------------------------------------------------- set_cli_abort_call() check_dots_empty(call = get_cli_abort_call()) + check_class(fit, "survfit") check_not_missing(reference) check_string(reference) check_range(conf.level, range = c(0, 1)) @@ -50,19 +51,51 @@ add_difference_row.tbl_survfit_times <- function(x, check_class(pvalue_fun, "function") check_class(estimate_fun, "function") - # check that input `x` has a `by` var with 2+ levels - if (is_empty(x$inputs$by)) { + # extract formula and data from fit ------------------------------------------ + form <- fit$call$formula |> stats::as.formula() + by <- all.vars(form[[3]]) + + if (length(by) == 0) { + cli::cli_abort( + "Cannot run {.fun add_difference_row} when the {.arg fit} model does not include a strata/by variable.", + call = get_cli_abort_call() + ) + } + by <- by[1] # Take primary stratification variable + + data <- eval(fit$call$data, envir = parent.frame()) + if (is.null(data)) { cli::cli_abort( - "Cannot run {.fun add_difference_row} when {.code tbl_survfit_times()} does not include a {.arg by} argument.", + "Could not extract data from the {.arg fit} object. Ensure the model was called with a data frame in the data argument.", call = get_cli_abort_call() ) } + # Infer times if not explicitly provided ------------------------------------- + if (is.null(times)) { + # Extract the original dataframe passed to tbl_survfit_times + surv_df <- x$inputs$data + if (is.null(surv_df)) surv_df <- x$inputs[[1]] + + if (!is.null(surv_df) && "Time" %in% colnames(surv_df)) { + times <- as.numeric(unique(surv_df$Time)) + times <- times[!is.na(times)] + } + + if (is.null(times) || length(times) == 0) { + cli::cli_abort( + "Could not infer {.arg times} dynamically from the table inputs. Please provide the {.arg times} argument explicitly.", + call = get_cli_abort_call() + ) + } + } + + # extract reference levels from table ---------------------------------------- lst_by_levels <- x$table_styling$header |> dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> dplyr::select("column", "modify_stat_level") |> - deframe() |> + tibble::deframe() |> lapply(FUN = as.character) # check reference level is appropriate @@ -75,20 +108,15 @@ add_difference_row.tbl_survfit_times <- function(x, # get function inputs -------------------------------------------------------- func_inputs <- as.list(environment()) - by <- x$inputs$by - y <- x$inputs$y - times <- x$inputs$times - data <- x$inputs$data - form <- glue("{y} ~ {cardx::bt(by)}") |> stats::as.formula() # add reference level to the first position in factor - data[[by]] <- fct_relevel(data[[by]], reference, after = 0L) + data[[by]] <- forcats::fct_relevel(data[[by]], reference, after = 0L) ref_col <- names(lst_by_levels)[lst_by_levels == reference] # move reference column to first position in `x` x <- x |> gtsummary::modify_table_body( - ~ .x |> dplyr::relocate(all_of(ref_col), .after = "label") + ~ .x |> dplyr::relocate(dplyr::all_of(ref_col), .after = "label") ) # calculate survival difference ---------------------------------------------- @@ -108,14 +136,14 @@ add_difference_row.tbl_survfit_times <- function(x, tbl_surv_diff <- data |> # create dummy table to add difference rows to - gtsummary::tbl_summary(by = any_of(by), include = as.character(times), missing = "no") |> + gtsummary::tbl_summary(by = dplyr::any_of(by), include = as.character(times), missing = "no") |> gtsummary::add_difference_row( reference = reference, - statistic = everything() ~ statistic, - test = everything() ~ survfit_diff_ard_fun, + statistic = gtsummary::everything() ~ statistic, + test = gtsummary::everything() ~ survfit_diff_ard_fun, conf.level = conf.level, pvalue_fun = pvalue_fun, - estimate_fun = everything() ~ estimate_fun + estimate_fun = gtsummary::everything() ~ estimate_fun ) # build gtsummary table ------------------------------------------------------ @@ -125,14 +153,14 @@ add_difference_row.tbl_survfit_times <- function(x, gtsummary::modify_table_body( ~ .x |> # remove dummy table label rows - dplyr::filter(row_type != "label") |> + dplyr::filter(.data$row_type != "label") |> dplyr::mutate( # match variable names to `x` variable = paste0("time", .data$variable), # add default labels label = dplyr::case_when( .data$label == "Survival Difference" ~ "Difference in Event Free Rates", - .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), + .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue::glue("{style_roche_number(conf.level, scale = 100)}% CI"), .data$label == "p-value" ~ "p-value (Z-test)", .default = .data$label ) @@ -177,4 +205,4 @@ add_difference_row.tbl_survfit_times <- function(x, x |> structure(class = c("tbl_survfit_times", "gtsummary")) -} +} \ No newline at end of file diff --git a/tests/testthat/_snaps/add_difference_row.md b/tests/testthat/_snaps/add_difference_row.md index 59ff3ebb3..3704602b0 100644 --- a/tests/testthat/_snaps/add_difference_row.md +++ b/tests/testthat/_snaps/add_difference_row.md @@ -1,56 +1,16 @@ -# add_difference_row.tbl_survfit_times() works - - Code - as.data.frame(tbl1) - Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 60 - 2 Patients remaining at risk 59 14 20 - 3 Event Free Rate (%) 76.8 24.3 31.1 - 4 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) - 5 Difference in Event Free Rates 52.54 45.77 - 6 95% CI (38.65, 66.43) (31.57, 59.97) - 7 p-value (Z-test) <0.0001 <0.0001 - 8 Time 120 - 9 Patients remaining at risk 45 4 8 - 10 Event Free Rate (%) 64.3 9.2 14.7 - 11 95% CI (53.6, 75.1) (1.1, 17.2) (5.7, 23.7) - 12 Difference in Event Free Rates 55.16 49.68 - 13 95% CI (41.76, 68.56) (35.70, 63.65) - 14 p-value (Z-test) <0.0001 <0.0001 - ---- - - Code - as.data.frame(tbl3) - Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 60 - 2 Patients remaining at risk 59 14 20 - 3 Event Free Rate (%) 76.8 24.3 31.1 - 4 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) - 5 Survival Difference (Survival Difference Standard Error) 52.5 (7.1) 45.8 (7.2) - 6 z statistic (p = p-value) 741.5 (p = <0.001) 631.8 (p = <0.001) - 7 Time 120 - 8 Patients remaining at risk 45 4 8 - 9 Event Free Rate (%) 64.3 9.2 14.7 - 10 95% CI (53.6, 75.1) (1.1, 17.2) (5.7, 23.7) - 11 Survival Difference (Survival Difference Standard Error) 55.2 (6.8) 49.7 (7.1) - 12 z statistic (p = p-value) 806.8 (p = <0.001) 696.7 (p = <0.001) - # add_difference_row.tbl_survfit_times() error messaging works Code - add_difference_row(tbl_survfit_times(data = cards::ADTTE, times = c(30, 60)), "Placebo") + add_difference_row(tbl_survfit_times(df_unstrat), fit = fit_unstrat, reference = "Placebo") Condition Error in `add_difference_row()`: - ! Cannot run `add_difference_row()` when `tbl_survfit_times()` does not include a `by` argument. + ! Cannot run `add_difference_row()` when the `fit` model does not include a strata/by variable. --- Code - add_difference_row(tbl_survfit_times(data = cards::ADTTE, by = TRTA, times = c(30, 60)), "No Treatment") + add_difference_row(tbl_survfit_times(surv_df), fit = fit_strat, reference = "No Treatment") Condition Error in `add_difference_row()`: - ! The `reference` argument must be one of "Placebo", "Xanomeline High Dose", and "Xanomeline Low Dose". + ! Could not infer `times` dynamically from the table inputs. Please provide the `times` argument explicitly. diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index 8b28e4b4e..54bad22a6 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -1,34 +1,38 @@ -skip_if_pkg_not_installed(c("survival", "withr")) +skip_if_pkg_not_installed(c("survival", "withr", "cards")) -tbl <- - tbl_survfit_times( - data = cards::ADTTE, - by = TRTA, - times = c(60, 120) - ) +# 1. New Decoupled Setup +fit_strat <- survival::survfit( + survival::Surv(AVAL, 1 - CNSR) ~ TRTA, + data = cards::ADTTE +) +surv_df <- get_surv_times_df(fit_strat, times = c(60, 120)) +tbl <- tbl_survfit_times(surv_df) test_that("add_difference_row.tbl_survfit_times() works", { withr::local_options(list(width = 200)) + + # Standard use (times inferred automatically) expect_silent( tbl1 <- tbl |> - add_difference_row(reference = "Placebo") + add_difference_row(fit = fit_strat, reference = "Placebo") ) expect_snapshot(as.data.frame(tbl1)) - # works with different reference column + # Works with different reference column expect_silent( tbl2 <- tbl |> - add_difference_row(reference = "Xanomeline Low Dose") + add_difference_row(fit = fit_strat, reference = "Xanomeline Low Dose") ) expect_equal( as.data.frame(tbl2) |> names(), c("", "Xanomeline Low Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline High Dose \n(N = 84)") ) - # works with custom statistics/formats + # Works with custom statistics/formats expect_silent( tbl3 <- tbl |> add_difference_row( + fit = fit_strat, reference = "Placebo", statistic = c("{estimate} ({std.error})", "{statistic} (p = {p.value})"), pvalue_fun = label_style_pvalue(digits = 3), @@ -37,37 +41,31 @@ test_that("add_difference_row.tbl_survfit_times() works", { ) expect_snapshot(as.data.frame(tbl3)) - # no error if overall column is present + # Works when times are explicitly provided (bypassing inference) expect_silent( tbl4 <- tbl |> - add_overall(last = TRUE) |> - add_difference_row(reference = "Xanomeline High Dose") - ) - expect_equal( - as.data.frame(tbl4) |> names(), - c("", "Xanomeline High Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline Low Dose \n(N = 84)", "All Participants \nN = 254") + add_difference_row(fit = fit_strat, reference = "Placebo", times = c(60, 120)) ) + expect_snapshot(as.data.frame(tbl4)) }) test_that("add_difference_row.tbl_survfit_times() error messaging works", { withr::local_options(list(width = 200)) + # Error 1: Model has no stratification variable + fit_unstrat <- survival::survfit(survival::Surv(AVAL, 1 - CNSR) ~ 1, data = cards::ADTTE) + df_unstrat <- get_surv_times_df(fit_unstrat, times = c(30, 60)) + expect_snapshot( error = TRUE, - tbl_survfit_times( - data = cards::ADTTE, - times = c(30, 60) - ) |> - add_difference_row("Placebo") + tbl_survfit_times(df_unstrat) |> + add_difference_row(fit = fit_unstrat, reference = "Placebo") ) + # Error 2: Reference level does not exist expect_snapshot( error = TRUE, - tbl_survfit_times( - data = cards::ADTTE, - by = TRTA, - times = c(30, 60) - ) |> - add_difference_row("No Treatment") + tbl_survfit_times(surv_df) |> + add_difference_row(fit = fit_strat, reference = "No Treatment") ) -}) +}) \ No newline at end of file From b61080411bb4d685831aa328a098ceba2d0bb7a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 13:03:19 +0200 Subject: [PATCH 07/16] styler --- R/add_difference_row.R | 12 ++++++------ tests/testthat/test-add_difference_row.R | 6 +++--- tests/testthat/test-gg_km.R | 2 +- tests/testthat/test-tbl_survfit_times.R | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 8c65ee0a8..f0b21192a 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -14,7 +14,7 @@ #' Numeric vector of times at which to calculate the differences. If `NULL` (default), the function #' will attempt to infer the times dynamically from the table inputs. #' @param reference (`string`)\cr -#' Value of the `by` variable that is the reference for each of the difference calculations. +#' Value of the `by` variable that is the reference for each of the difference calculations. #' For factors, use the character level. The reference column will appear as the leftmost column in the table. #' @param pvalue_fun (`function`)\cr #' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. @@ -54,7 +54,7 @@ add_difference_row.tbl_survfit_times <- function(x, # extract formula and data from fit ------------------------------------------ form <- fit$call$formula |> stats::as.formula() by <- all.vars(form[[3]]) - + if (length(by) == 0) { cli::cli_abort( "Cannot run {.fun add_difference_row} when the {.arg fit} model does not include a strata/by variable.", @@ -75,13 +75,13 @@ add_difference_row.tbl_survfit_times <- function(x, if (is.null(times)) { # Extract the original dataframe passed to tbl_survfit_times surv_df <- x$inputs$data - if (is.null(surv_df)) surv_df <- x$inputs[[1]] - + if (is.null(surv_df)) surv_df <- x$inputs[[1]] + if (!is.null(surv_df) && "Time" %in% colnames(surv_df)) { times <- as.numeric(unique(surv_df$Time)) times <- times[!is.na(times)] } - + if (is.null(times) || length(times) == 0) { cli::cli_abort( "Could not infer {.arg times} dynamically from the table inputs. Please provide the {.arg times} argument explicitly.", @@ -205,4 +205,4 @@ add_difference_row.tbl_survfit_times <- function(x, x |> structure(class = c("tbl_survfit_times", "gtsummary")) -} \ No newline at end of file +} diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index 54bad22a6..d1699d679 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -10,7 +10,7 @@ tbl <- tbl_survfit_times(surv_df) test_that("add_difference_row.tbl_survfit_times() works", { withr::local_options(list(width = 200)) - + # Standard use (times inferred automatically) expect_silent( tbl1 <- tbl |> @@ -55,7 +55,7 @@ test_that("add_difference_row.tbl_survfit_times() error messaging works", { # Error 1: Model has no stratification variable fit_unstrat <- survival::survfit(survival::Surv(AVAL, 1 - CNSR) ~ 1, data = cards::ADTTE) df_unstrat <- get_surv_times_df(fit_unstrat, times = c(30, 60)) - + expect_snapshot( error = TRUE, tbl_survfit_times(df_unstrat) |> @@ -68,4 +68,4 @@ test_that("add_difference_row.tbl_survfit_times() error messaging works", { tbl_survfit_times(surv_df) |> add_difference_row(fit = fit_strat, reference = "No Treatment") ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index c9e6b6da4..dc0d283d8 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -129,4 +129,4 @@ test_that("df2gg engines (aligned and floating) work correctly", { hline = FALSE ) ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 414c21a33..0bb284740 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -89,4 +89,4 @@ test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { "`add_overall()` is defunct for `tbl_survfit_times`", fixed = TRUE ) -}) \ No newline at end of file +}) From 6f4c175bf1ff95bb2918f5aaa807553a5fb15ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 13:49:57 +0200 Subject: [PATCH 08/16] Revert "styler" This reverts commit b61080411bb4d685831aa328a098ceba2d0bb7a8. --- R/add_difference_row.R | 12 ++++++------ tests/testthat/test-add_difference_row.R | 6 +++--- tests/testthat/test-gg_km.R | 2 +- tests/testthat/test-tbl_survfit_times.R | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index f0b21192a..8c65ee0a8 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -14,7 +14,7 @@ #' Numeric vector of times at which to calculate the differences. If `NULL` (default), the function #' will attempt to infer the times dynamically from the table inputs. #' @param reference (`string`)\cr -#' Value of the `by` variable that is the reference for each of the difference calculations. +#' Value of the `by` variable that is the reference for each of the difference calculations. #' For factors, use the character level. The reference column will appear as the leftmost column in the table. #' @param pvalue_fun (`function`)\cr #' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. @@ -54,7 +54,7 @@ add_difference_row.tbl_survfit_times <- function(x, # extract formula and data from fit ------------------------------------------ form <- fit$call$formula |> stats::as.formula() by <- all.vars(form[[3]]) - + if (length(by) == 0) { cli::cli_abort( "Cannot run {.fun add_difference_row} when the {.arg fit} model does not include a strata/by variable.", @@ -75,13 +75,13 @@ add_difference_row.tbl_survfit_times <- function(x, if (is.null(times)) { # Extract the original dataframe passed to tbl_survfit_times surv_df <- x$inputs$data - if (is.null(surv_df)) surv_df <- x$inputs[[1]] - + if (is.null(surv_df)) surv_df <- x$inputs[[1]] + if (!is.null(surv_df) && "Time" %in% colnames(surv_df)) { times <- as.numeric(unique(surv_df$Time)) times <- times[!is.na(times)] } - + if (is.null(times) || length(times) == 0) { cli::cli_abort( "Could not infer {.arg times} dynamically from the table inputs. Please provide the {.arg times} argument explicitly.", @@ -205,4 +205,4 @@ add_difference_row.tbl_survfit_times <- function(x, x |> structure(class = c("tbl_survfit_times", "gtsummary")) -} +} \ No newline at end of file diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index d1699d679..54bad22a6 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -10,7 +10,7 @@ tbl <- tbl_survfit_times(surv_df) test_that("add_difference_row.tbl_survfit_times() works", { withr::local_options(list(width = 200)) - + # Standard use (times inferred automatically) expect_silent( tbl1 <- tbl |> @@ -55,7 +55,7 @@ test_that("add_difference_row.tbl_survfit_times() error messaging works", { # Error 1: Model has no stratification variable fit_unstrat <- survival::survfit(survival::Surv(AVAL, 1 - CNSR) ~ 1, data = cards::ADTTE) df_unstrat <- get_surv_times_df(fit_unstrat, times = c(30, 60)) - + expect_snapshot( error = TRUE, tbl_survfit_times(df_unstrat) |> @@ -68,4 +68,4 @@ test_that("add_difference_row.tbl_survfit_times() error messaging works", { tbl_survfit_times(surv_df) |> add_difference_row(fit = fit_strat, reference = "No Treatment") ) -}) +}) \ No newline at end of file diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index dc0d283d8..c9e6b6da4 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -129,4 +129,4 @@ test_that("df2gg engines (aligned and floating) work correctly", { hline = FALSE ) ) -}) +}) \ No newline at end of file diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 0bb284740..414c21a33 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -89,4 +89,4 @@ test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { "`add_overall()` is defunct for `tbl_survfit_times`", fixed = TRUE ) -}) +}) \ No newline at end of file From dfff96f0fd779d5131c4bf80d46e30caaaae7151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 13:50:09 +0200 Subject: [PATCH 09/16] Revert "Update add_difference_row.R" This reverts commit 382610f198de56c89a845c513b4b822f535a386e. --- R/add_difference_row.R | 94 ++++++++------------- tests/testthat/_snaps/add_difference_row.md | 48 ++++++++++- tests/testthat/test-add_difference_row.R | 58 +++++++------ 3 files changed, 107 insertions(+), 93 deletions(-) diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 8c65ee0a8..173ea0a10 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -3,19 +3,19 @@ #' Adds survival differences between groups as additional rows to tables created by [`tbl_survfit_times()`]. #' #' Difference statistics are calculated using [cardx::ard_survival_survfit_diff()] -#' for all specified timepoints, extracting the formula and dataset directly from the provided `fit` object. +#' for all `tbl_survfit_times(times)` variable values, using `survfit` formula: +#' ```r +#' survival::survfit(y ~ by, data = data) +#' ``` +#' where `y`, `by` and `data` are the inputs of the same names to the `tbl_survfit_times()` object `x`. #' #' Pairwise differences are calculated relative to the specified `by` variable's specified reference level. #' #' @inheritParams gtsummary::add_difference.tbl_summary -#' @param fit (`survfit`)\cr -#' The original `survival::survfit()` model used to extract the survival times. -#' @param times (`numeric`)\cr -#' Numeric vector of times at which to calculate the differences. If `NULL` (default), the function -#' will attempt to infer the times dynamically from the table inputs. #' @param reference (`string`)\cr -#' Value of the `by` variable that is the reference for each of the difference calculations. -#' For factors, use the character level. The reference column will appear as the leftmost column in the table. +#' Value of the `tbl_survfit_times(by)` variable value that is the reference for +#' each of the difference calculations. For factors, use the character +#' level. The reference column will appear as the leftmost column in the table. #' @param pvalue_fun (`function`)\cr #' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. #' The function must have a numeric vector input, and return a string that is the @@ -26,15 +26,15 @@ #' #' @examples #' # Example 2 - Survival Differences ----------- -#' fit <- survival::survfit(survival::Surv(AVAL, 1 - CNSR) ~ TRTA, data = cards::ADTTE) -#' surv_df <- get_surv_times_df(fit, times = c(30, 60)) -#' -#' tbl_survfit_times(surv_df, label = "Day {time}") |> -#' add_difference_row(fit = fit, reference = "Placebo") +#' tbl_survfit_times( +#' data = cards::ADTTE, +#' by = "TRTA", +#' times = c(30, 60), +#' label = "Day {time}" +#' ) |> +#' add_difference_row(reference = "Placebo") add_difference_row.tbl_survfit_times <- function(x, - fit, reference, - times = NULL, statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), conf.level = 0.95, pvalue_fun = label_roche_pvalue(), @@ -43,7 +43,6 @@ add_difference_row.tbl_survfit_times <- function(x, # check inputs --------------------------------------------------------------- set_cli_abort_call() check_dots_empty(call = get_cli_abort_call()) - check_class(fit, "survfit") check_not_missing(reference) check_string(reference) check_range(conf.level, range = c(0, 1)) @@ -51,51 +50,19 @@ add_difference_row.tbl_survfit_times <- function(x, check_class(pvalue_fun, "function") check_class(estimate_fun, "function") - # extract formula and data from fit ------------------------------------------ - form <- fit$call$formula |> stats::as.formula() - by <- all.vars(form[[3]]) - - if (length(by) == 0) { - cli::cli_abort( - "Cannot run {.fun add_difference_row} when the {.arg fit} model does not include a strata/by variable.", - call = get_cli_abort_call() - ) - } - by <- by[1] # Take primary stratification variable - - data <- eval(fit$call$data, envir = parent.frame()) - if (is.null(data)) { + # check that input `x` has a `by` var with 2+ levels + if (is_empty(x$inputs$by)) { cli::cli_abort( - "Could not extract data from the {.arg fit} object. Ensure the model was called with a data frame in the data argument.", + "Cannot run {.fun add_difference_row} when {.code tbl_survfit_times()} does not include a {.arg by} argument.", call = get_cli_abort_call() ) } - # Infer times if not explicitly provided ------------------------------------- - if (is.null(times)) { - # Extract the original dataframe passed to tbl_survfit_times - surv_df <- x$inputs$data - if (is.null(surv_df)) surv_df <- x$inputs[[1]] - - if (!is.null(surv_df) && "Time" %in% colnames(surv_df)) { - times <- as.numeric(unique(surv_df$Time)) - times <- times[!is.na(times)] - } - - if (is.null(times) || length(times) == 0) { - cli::cli_abort( - "Could not infer {.arg times} dynamically from the table inputs. Please provide the {.arg times} argument explicitly.", - call = get_cli_abort_call() - ) - } - } - - # extract reference levels from table ---------------------------------------- lst_by_levels <- x$table_styling$header |> dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> dplyr::select("column", "modify_stat_level") |> - tibble::deframe() |> + deframe() |> lapply(FUN = as.character) # check reference level is appropriate @@ -108,15 +75,20 @@ add_difference_row.tbl_survfit_times <- function(x, # get function inputs -------------------------------------------------------- func_inputs <- as.list(environment()) + by <- x$inputs$by + y <- x$inputs$y + times <- x$inputs$times + data <- x$inputs$data + form <- glue("{y} ~ {cardx::bt(by)}") |> stats::as.formula() # add reference level to the first position in factor - data[[by]] <- forcats::fct_relevel(data[[by]], reference, after = 0L) + data[[by]] <- fct_relevel(data[[by]], reference, after = 0L) ref_col <- names(lst_by_levels)[lst_by_levels == reference] # move reference column to first position in `x` x <- x |> gtsummary::modify_table_body( - ~ .x |> dplyr::relocate(dplyr::all_of(ref_col), .after = "label") + ~ .x |> dplyr::relocate(all_of(ref_col), .after = "label") ) # calculate survival difference ---------------------------------------------- @@ -136,14 +108,14 @@ add_difference_row.tbl_survfit_times <- function(x, tbl_surv_diff <- data |> # create dummy table to add difference rows to - gtsummary::tbl_summary(by = dplyr::any_of(by), include = as.character(times), missing = "no") |> + gtsummary::tbl_summary(by = any_of(by), include = as.character(times), missing = "no") |> gtsummary::add_difference_row( reference = reference, - statistic = gtsummary::everything() ~ statistic, - test = gtsummary::everything() ~ survfit_diff_ard_fun, + statistic = everything() ~ statistic, + test = everything() ~ survfit_diff_ard_fun, conf.level = conf.level, pvalue_fun = pvalue_fun, - estimate_fun = gtsummary::everything() ~ estimate_fun + estimate_fun = everything() ~ estimate_fun ) # build gtsummary table ------------------------------------------------------ @@ -153,14 +125,14 @@ add_difference_row.tbl_survfit_times <- function(x, gtsummary::modify_table_body( ~ .x |> # remove dummy table label rows - dplyr::filter(.data$row_type != "label") |> + dplyr::filter(row_type != "label") |> dplyr::mutate( # match variable names to `x` variable = paste0("time", .data$variable), # add default labels label = dplyr::case_when( .data$label == "Survival Difference" ~ "Difference in Event Free Rates", - .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue::glue("{style_roche_number(conf.level, scale = 100)}% CI"), + .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), .data$label == "p-value" ~ "p-value (Z-test)", .default = .data$label ) @@ -205,4 +177,4 @@ add_difference_row.tbl_survfit_times <- function(x, x |> structure(class = c("tbl_survfit_times", "gtsummary")) -} \ No newline at end of file +} diff --git a/tests/testthat/_snaps/add_difference_row.md b/tests/testthat/_snaps/add_difference_row.md index 3704602b0..59ff3ebb3 100644 --- a/tests/testthat/_snaps/add_difference_row.md +++ b/tests/testthat/_snaps/add_difference_row.md @@ -1,16 +1,56 @@ +# add_difference_row.tbl_survfit_times() works + + Code + as.data.frame(tbl1) + Output + Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) + 1 Time 60 + 2 Patients remaining at risk 59 14 20 + 3 Event Free Rate (%) 76.8 24.3 31.1 + 4 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) + 5 Difference in Event Free Rates 52.54 45.77 + 6 95% CI (38.65, 66.43) (31.57, 59.97) + 7 p-value (Z-test) <0.0001 <0.0001 + 8 Time 120 + 9 Patients remaining at risk 45 4 8 + 10 Event Free Rate (%) 64.3 9.2 14.7 + 11 95% CI (53.6, 75.1) (1.1, 17.2) (5.7, 23.7) + 12 Difference in Event Free Rates 55.16 49.68 + 13 95% CI (41.76, 68.56) (35.70, 63.65) + 14 p-value (Z-test) <0.0001 <0.0001 + +--- + + Code + as.data.frame(tbl3) + Output + Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) + 1 Time 60 + 2 Patients remaining at risk 59 14 20 + 3 Event Free Rate (%) 76.8 24.3 31.1 + 4 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) + 5 Survival Difference (Survival Difference Standard Error) 52.5 (7.1) 45.8 (7.2) + 6 z statistic (p = p-value) 741.5 (p = <0.001) 631.8 (p = <0.001) + 7 Time 120 + 8 Patients remaining at risk 45 4 8 + 9 Event Free Rate (%) 64.3 9.2 14.7 + 10 95% CI (53.6, 75.1) (1.1, 17.2) (5.7, 23.7) + 11 Survival Difference (Survival Difference Standard Error) 55.2 (6.8) 49.7 (7.1) + 12 z statistic (p = p-value) 806.8 (p = <0.001) 696.7 (p = <0.001) + # add_difference_row.tbl_survfit_times() error messaging works Code - add_difference_row(tbl_survfit_times(df_unstrat), fit = fit_unstrat, reference = "Placebo") + add_difference_row(tbl_survfit_times(data = cards::ADTTE, times = c(30, 60)), "Placebo") Condition Error in `add_difference_row()`: - ! Cannot run `add_difference_row()` when the `fit` model does not include a strata/by variable. + ! Cannot run `add_difference_row()` when `tbl_survfit_times()` does not include a `by` argument. --- Code - add_difference_row(tbl_survfit_times(surv_df), fit = fit_strat, reference = "No Treatment") + add_difference_row(tbl_survfit_times(data = cards::ADTTE, by = TRTA, times = c(30, 60)), "No Treatment") Condition Error in `add_difference_row()`: - ! Could not infer `times` dynamically from the table inputs. Please provide the `times` argument explicitly. + ! The `reference` argument must be one of "Placebo", "Xanomeline High Dose", and "Xanomeline Low Dose". diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index 54bad22a6..8b28e4b4e 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -1,38 +1,34 @@ -skip_if_pkg_not_installed(c("survival", "withr", "cards")) +skip_if_pkg_not_installed(c("survival", "withr")) -# 1. New Decoupled Setup -fit_strat <- survival::survfit( - survival::Surv(AVAL, 1 - CNSR) ~ TRTA, - data = cards::ADTTE -) -surv_df <- get_surv_times_df(fit_strat, times = c(60, 120)) -tbl <- tbl_survfit_times(surv_df) +tbl <- + tbl_survfit_times( + data = cards::ADTTE, + by = TRTA, + times = c(60, 120) + ) test_that("add_difference_row.tbl_survfit_times() works", { withr::local_options(list(width = 200)) - - # Standard use (times inferred automatically) expect_silent( tbl1 <- tbl |> - add_difference_row(fit = fit_strat, reference = "Placebo") + add_difference_row(reference = "Placebo") ) expect_snapshot(as.data.frame(tbl1)) - # Works with different reference column + # works with different reference column expect_silent( tbl2 <- tbl |> - add_difference_row(fit = fit_strat, reference = "Xanomeline Low Dose") + add_difference_row(reference = "Xanomeline Low Dose") ) expect_equal( as.data.frame(tbl2) |> names(), c("", "Xanomeline Low Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline High Dose \n(N = 84)") ) - # Works with custom statistics/formats + # works with custom statistics/formats expect_silent( tbl3 <- tbl |> add_difference_row( - fit = fit_strat, reference = "Placebo", statistic = c("{estimate} ({std.error})", "{statistic} (p = {p.value})"), pvalue_fun = label_style_pvalue(digits = 3), @@ -41,31 +37,37 @@ test_that("add_difference_row.tbl_survfit_times() works", { ) expect_snapshot(as.data.frame(tbl3)) - # Works when times are explicitly provided (bypassing inference) + # no error if overall column is present expect_silent( tbl4 <- tbl |> - add_difference_row(fit = fit_strat, reference = "Placebo", times = c(60, 120)) + add_overall(last = TRUE) |> + add_difference_row(reference = "Xanomeline High Dose") + ) + expect_equal( + as.data.frame(tbl4) |> names(), + c("", "Xanomeline High Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline Low Dose \n(N = 84)", "All Participants \nN = 254") ) - expect_snapshot(as.data.frame(tbl4)) }) test_that("add_difference_row.tbl_survfit_times() error messaging works", { withr::local_options(list(width = 200)) - # Error 1: Model has no stratification variable - fit_unstrat <- survival::survfit(survival::Surv(AVAL, 1 - CNSR) ~ 1, data = cards::ADTTE) - df_unstrat <- get_surv_times_df(fit_unstrat, times = c(30, 60)) - expect_snapshot( error = TRUE, - tbl_survfit_times(df_unstrat) |> - add_difference_row(fit = fit_unstrat, reference = "Placebo") + tbl_survfit_times( + data = cards::ADTTE, + times = c(30, 60) + ) |> + add_difference_row("Placebo") ) - # Error 2: Reference level does not exist expect_snapshot( error = TRUE, - tbl_survfit_times(surv_df) |> - add_difference_row(fit = fit_strat, reference = "No Treatment") + tbl_survfit_times( + data = cards::ADTTE, + by = TRTA, + times = c(30, 60) + ) |> + add_difference_row("No Treatment") ) -}) \ No newline at end of file +}) From 67cd14d84abd5d3ad3b5226fc66ed51b52c56147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 17 Jun 2026 14:51:07 +0200 Subject: [PATCH 10/16] Deprecate add_difference_row; add get_surv_diff_df --- R/add_difference_row.R | 197 +++----------------- R/get_surv_diff_df.R | 105 +++++++++++ tests/testthat/_snaps/add_difference_row.md | 56 ------ tests/testthat/test-add_difference_row.R | 92 +++------ tests/testthat/test-get_surv_diff_df.R.R | 86 +++++++++ tests/testthat/test-gg_km.R | 2 +- tests/testthat/test-tbl_survfit_times.R | 2 +- 7 files changed, 239 insertions(+), 301 deletions(-) create mode 100644 R/get_surv_diff_df.R delete mode 100644 tests/testthat/_snaps/add_difference_row.md create mode 100644 tests/testthat/test-get_surv_diff_df.R.R diff --git a/R/add_difference_row.R b/R/add_difference_row.R index 173ea0a10..292c862e9 100644 --- a/R/add_difference_row.R +++ b/R/add_difference_row.R @@ -1,180 +1,33 @@ -#' @describeIn tbl_survfit_times +#' Add Difference Row to tbl_survfit_times (Legacy) #' -#' Adds survival differences between groups as additional rows to tables created by [`tbl_survfit_times()`]. +#' @description +#' This function is maintained as a legacy method to provide a clear migration +#' path for existing scripts. Because the architecture of `tbl_survfit_times()` +#' was decoupled to separate data extraction from table rendering, it is no +#' longer possible to automatically compute survival differences from the +#' rendered table object. Users should transition to `get_surv_diff_df()`. #' -#' Difference statistics are calculated using [cardx::ard_survival_survfit_diff()] -#' for all `tbl_survfit_times(times)` variable values, using `survfit` formula: -#' ```r -#' survival::survfit(y ~ by, data = data) -#' ``` -#' where `y`, `by` and `data` are the inputs of the same names to the `tbl_survfit_times()` object `x`. +#' @param x (`tbl_survfit_times`)\cr +#' A table object generated by `tbl_survfit_times()`. +#' @param ... Additional arguments passed to other methods. #' -#' Pairwise differences are calculated relative to the specified `by` variable's specified reference level. -#' -#' @inheritParams gtsummary::add_difference.tbl_summary -#' @param reference (`string`)\cr -#' Value of the `tbl_survfit_times(by)` variable value that is the reference for -#' each of the difference calculations. For factors, use the character -#' level. The reference column will appear as the leftmost column in the table. -#' @param pvalue_fun (`function`)\cr -#' Function to round and format the `p.value` statistic. Default is [label_roche_pvalue()]. -#' The function must have a numeric vector input, and return a string that is the -#' rounded/formatted p-value (e.g. `pvalue_fun = label_style_pvalue(digits = 3)`). +#' @return Defunct; throws an error with migration instructions directing users +#' to `get_surv_diff_df()`. #' +#' @method add_difference_row tbl_survfit_times #' @export -#' @order 3 -#' -#' @examples -#' # Example 2 - Survival Differences ----------- -#' tbl_survfit_times( -#' data = cards::ADTTE, -#' by = "TRTA", -#' times = c(30, 60), -#' label = "Day {time}" -#' ) |> -#' add_difference_row(reference = "Placebo") -add_difference_row.tbl_survfit_times <- function(x, - reference, - statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), - conf.level = 0.95, - pvalue_fun = label_roche_pvalue(), - estimate_fun = label_roche_number(digits = 2, scale = 100), - ...) { - # check inputs --------------------------------------------------------------- - set_cli_abort_call() - check_dots_empty(call = get_cli_abort_call()) - check_not_missing(reference) - check_string(reference) - check_range(conf.level, range = c(0, 1)) - check_class(statistic, "character") - check_class(pvalue_fun, "function") - check_class(estimate_fun, "function") - - # check that input `x` has a `by` var with 2+ levels - if (is_empty(x$inputs$by)) { - cli::cli_abort( - "Cannot run {.fun add_difference_row} when {.code tbl_survfit_times()} does not include a {.arg by} argument.", - call = get_cli_abort_call() - ) - } - - lst_by_levels <- - x$table_styling$header |> - dplyr::filter(grepl(pattern = "^stat_\\d*[1-9]\\d*$", x = .data$column)) |> - dplyr::select("column", "modify_stat_level") |> - deframe() |> - lapply(FUN = as.character) - - # check reference level is appropriate - if (!as.character(reference) %in% unlist(lst_by_levels)) { - cli::cli_abort( - "The {.arg reference} argument must be one of {.val {unlist(lst_by_levels)}}.", - call = get_cli_abort_call() - ) - } - - # get function inputs -------------------------------------------------------- - func_inputs <- as.list(environment()) - by <- x$inputs$by - y <- x$inputs$y - times <- x$inputs$times - data <- x$inputs$data - form <- glue("{y} ~ {cardx::bt(by)}") |> stats::as.formula() - - # add reference level to the first position in factor - data[[by]] <- fct_relevel(data[[by]], reference, after = 0L) - ref_col <- names(lst_by_levels)[lst_by_levels == reference] - - # move reference column to first position in `x` - x <- x |> - gtsummary::modify_table_body( - ~ .x |> dplyr::relocate(all_of(ref_col), .after = "label") +add_difference_row.tbl_survfit_times <- function(x, ...) { + # The table object no longer stores raw data or model formulas. + # We abort with a highly specific migration path directing the user + # to the new get_surv_diff_df() extractor to fit the decoupled design. + rlang::abort( + paste( + "`add_difference_row()` is defunct for `tbl_survfit_times`.", + "Since the architecture was decoupled, the table object no longer", + "contains the raw data required to calculate survival differences.", + "Please use the new `get_surv_diff_df()` function to extract difference", + "statistics, bind those rows to your survival dataframe using", + "`dplyr::bind_rows()`, and then render the final `tbl_survfit_times()`." ) - - # calculate survival difference ---------------------------------------------- - survfit_diff_ard_fun <- function(data, variable, ...) { - cardx::ard_survival_survfit_diff( - x = rlang::inject(survival::survfit(!!form, data = data)), - times = as.numeric(variable), - conf.level = conf.level - ) |> - dplyr::filter(!.data$stat_name %in% c("method", "reference_level")) - } - - # difference to be calculated for each time - for (t in times) data[[as.character(t)]] <- NA - - # create difference rows - tbl_surv_diff <- - data |> - # create dummy table to add difference rows to - gtsummary::tbl_summary(by = any_of(by), include = as.character(times), missing = "no") |> - gtsummary::add_difference_row( - reference = reference, - statistic = everything() ~ statistic, - test = everything() ~ survfit_diff_ard_fun, - conf.level = conf.level, - pvalue_fun = pvalue_fun, - estimate_fun = everything() ~ estimate_fun - ) - - # build gtsummary table ------------------------------------------------------ - tbl_surv_diff <- - tbl_surv_diff |> - # remove time labels - gtsummary::modify_table_body( - ~ .x |> - # remove dummy table label rows - dplyr::filter(row_type != "label") |> - dplyr::mutate( - # match variable names to `x` - variable = paste0("time", .data$variable), - # add default labels - label = dplyr::case_when( - .data$label == "Survival Difference" ~ "Difference in Event Free Rates", - .data$label == "(CI Lower Bound, CI Upper Bound)" ~ glue("{style_roche_number(conf.level, scale = 100)}% CI"), - .data$label == "p-value" ~ "p-value (Z-test)", - .default = .data$label - ) - ) - ) |> - # indent rows - gtsummary::modify_indent(columns = "label", rows = .data$row_type == "difference_row", indent = 8L) |> - gtsummary::modify_indent(columns = "label", rows = .data$label == "Difference in Event Free Rates", indent = 4L) - - # remove ARD for dummy table rows - tbl_surv_diff$cards$tbl_summary <- NULL - - # add difference rows into tbl_survfit_times table - x <- - gtsummary::tbl_stack( - tbls = list(x, tbl_surv_diff), - quiet = TRUE - ) |> - # move survival difference rows under each section for each matching survival time - gtsummary::modify_table_body( - \(x) { - x |> - dplyr::mutate( - variable_f = factor(gsub("-row_difference", "", .data$variable), levels = unique(x$variable)), - idx_row = dplyr::row_number() - ) |> - dplyr::arrange(dplyr::pick("variable_f", "idx_row")) |> - dplyr::select(-"variable_f", -"idx_row") - } - ) - - # add info to table ---------------------------------------------------------- - x$call_list <- list( - "tbl_survfit_times" = x$tbls[[1]]$call_list, - "add_difference_row" = match.call() - ) - x$cards <- lapply(x$tbls, \(x) x$cards) |> unlist(recursive = FALSE) - x$inputs <- list( - "tbl_survfit_times" = x$tbls[[1]]$inputs, - "add_difference_row" = func_inputs ) - - x |> - structure(class = c("tbl_survfit_times", "gtsummary")) } diff --git a/R/get_surv_diff_df.R b/R/get_surv_diff_df.R new file mode 100644 index 000000000..473ce17cb --- /dev/null +++ b/R/get_surv_diff_df.R @@ -0,0 +1,105 @@ +#' Extract Survival Differences at Specific Times +#' +#' @description +#' Calculates survival differences, confidence intervals, and p-values at +#' specified time points using `cardx::ard_survival_survfit_diff()`. The +#' resulting `data.frame` is structurally identical to the output of +#' `get_surv_times_df()`, allowing them to be easily bound together before +#' passing into `tbl_survfit_times()`. +#' +#' @param fit_km (`survfit`)\cr +#' A fitted Kaplan-Meier object of class `survfit`. +#' @param times (`numeric`)\cr +#' A numeric vector of time points at which to evaluate differences. +#' @param reference (`character`)\cr +#' The name of the reference strata level to calculate differences against. +#' @param conf_int (`numeric`)\cr +#' The confidence level to use for the intervals. Defaults to `0.95`. +#' @param scale (`numeric`)\cr +#' A scaling factor for the survival estimates. For example, `100` converts +#' probabilities to percentages. Defaults to `1`. +#' +#' @return A `data.frame` with columns for `Strata`, `Time`, `Survival`, +#' `XX% CI`, and `p-value`. +#' +#' @examples +#' library(survival) +#' surv_data <- lung +#' surv_data$status <- surv_data$status - 1 +#' surv_data$sex <- factor(surv_data$sex, labels = c("Male", "Female")) +#' +#' # 1. Fit the model +#' fit_km <- survfit(Surv(time, status) ~ sex, data = surv_data) +#' +#' # 2. Extract standard survival times +#' df_surv <- get_surv_times_df(fit_km, times = c(100, 200)) +#' +#' # 3. Extract survival differences (Female vs Male) +#' df_diff <- get_surv_diff_df( +#' fit_km, +#' times = c(100, 200), +#' reference = "Male" +#' ) +#' +#' # 4. Combine and render +#' combined_df <- dplyr::bind_rows(df_surv, df_diff) +#' tbl_survfit_times(combined_df) +#' +#' @export +get_surv_diff_df <- function(fit_km, times, reference, conf_int = 0.95, + scale = 1) { + if (!inherits(fit_km, "survfit")) { + rlang::abort("`fit_km` must be a survfit object.") + } + + if (!is.numeric(times) || length(times) == 0) { + rlang::abort("`times` must be a non-empty numeric vector.") + } + + # Calculate differences using cardx's Analysis Results Data (ARD) framework + ard_res <- cardx::ard_survival_survfit_diff( + x = fit_km, + times = times, + conf.level = conf_int + ) + + # Extract the underlying list data into a flat format. + # Note: cardx stores the numeric time points in `variable_level`. + flat_df <- ard_res |> + dplyr::filter(!.data$stat_name %in% c("method", "reference_level")) |> + dplyr::mutate( + time = as.numeric(unlist(.data$variable_level)), + comp_level = as.character(unlist(.data$group1_level)), + value = as.numeric(unlist(.data$stat)) + ) |> + dplyr::select("time", "comp_level", "stat_name", "value") + + # Pivot wider to build standard columns matching get_surv_times_df() + wide_df <- flat_df |> + tidyr::pivot_wider( + names_from = "stat_name", + values_from = "value" + ) + + ci_col_name <- paste0(conf_int * 100, "% CI") + + # Construct the final data.frame, ensuring the strata name clearly denotes + # that this row represents a calculated difference versus the reference. + res_df <- data.frame( + Strata = paste(wide_df$comp_level, "vs", reference, "(Diff)"), + Time = wide_df$time, + `N at Risk` = NA_character_, + Survival = sprintf("%.2f", wide_df$estimate * scale), + CI = paste0( + "(", sprintf("%.2f", wide_df$conf.low * scale), + ", ", sprintf("%.2f", wide_df$conf.high * scale), ")" + ), + `p-value` = sprintf("%.3f", wide_df$p.value), + stringsAsFactors = FALSE, + check.names = FALSE + ) + + names(res_df)[names(res_df) == "CI"] <- ci_col_name + + res_df +} diff --git a/tests/testthat/_snaps/add_difference_row.md b/tests/testthat/_snaps/add_difference_row.md deleted file mode 100644 index 59ff3ebb3..000000000 --- a/tests/testthat/_snaps/add_difference_row.md +++ /dev/null @@ -1,56 +0,0 @@ -# add_difference_row.tbl_survfit_times() works - - Code - as.data.frame(tbl1) - Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 60 - 2 Patients remaining at risk 59 14 20 - 3 Event Free Rate (%) 76.8 24.3 31.1 - 4 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) - 5 Difference in Event Free Rates 52.54 45.77 - 6 95% CI (38.65, 66.43) (31.57, 59.97) - 7 p-value (Z-test) <0.0001 <0.0001 - 8 Time 120 - 9 Patients remaining at risk 45 4 8 - 10 Event Free Rate (%) 64.3 9.2 14.7 - 11 95% CI (53.6, 75.1) (1.1, 17.2) (5.7, 23.7) - 12 Difference in Event Free Rates 55.16 49.68 - 13 95% CI (41.76, 68.56) (35.70, 63.65) - 14 p-value (Z-test) <0.0001 <0.0001 - ---- - - Code - as.data.frame(tbl3) - Output - Placebo \n(N = 86) Xanomeline High Dose \n(N = 84) Xanomeline Low Dose \n(N = 84) - 1 Time 60 - 2 Patients remaining at risk 59 14 20 - 3 Event Free Rate (%) 76.8 24.3 31.1 - 4 95% CI (67.7, 86.0) (13.9, 34.7) (20.2, 41.9) - 5 Survival Difference (Survival Difference Standard Error) 52.5 (7.1) 45.8 (7.2) - 6 z statistic (p = p-value) 741.5 (p = <0.001) 631.8 (p = <0.001) - 7 Time 120 - 8 Patients remaining at risk 45 4 8 - 9 Event Free Rate (%) 64.3 9.2 14.7 - 10 95% CI (53.6, 75.1) (1.1, 17.2) (5.7, 23.7) - 11 Survival Difference (Survival Difference Standard Error) 55.2 (6.8) 49.7 (7.1) - 12 z statistic (p = p-value) 806.8 (p = <0.001) 696.7 (p = <0.001) - -# add_difference_row.tbl_survfit_times() error messaging works - - Code - add_difference_row(tbl_survfit_times(data = cards::ADTTE, times = c(30, 60)), "Placebo") - Condition - Error in `add_difference_row()`: - ! Cannot run `add_difference_row()` when `tbl_survfit_times()` does not include a `by` argument. - ---- - - Code - add_difference_row(tbl_survfit_times(data = cards::ADTTE, by = TRTA, times = c(30, 60)), "No Treatment") - Condition - Error in `add_difference_row()`: - ! The `reference` argument must be one of "Placebo", "Xanomeline High Dose", and "Xanomeline Low Dose". - diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R index 8b28e4b4e..7008fb326 100644 --- a/tests/testthat/test-add_difference_row.R +++ b/tests/testthat/test-add_difference_row.R @@ -1,73 +1,23 @@ -skip_if_pkg_not_installed(c("survival", "withr")) - -tbl <- - tbl_survfit_times( - data = cards::ADTTE, - by = TRTA, - times = c(60, 120) - ) - -test_that("add_difference_row.tbl_survfit_times() works", { - withr::local_options(list(width = 200)) - expect_silent( - tbl1 <- tbl |> - add_difference_row(reference = "Placebo") - ) - expect_snapshot(as.data.frame(tbl1)) - - # works with different reference column - expect_silent( - tbl2 <- tbl |> - add_difference_row(reference = "Xanomeline Low Dose") - ) - expect_equal( - as.data.frame(tbl2) |> names(), - c("", "Xanomeline Low Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline High Dose \n(N = 84)") - ) - - # works with custom statistics/formats - expect_silent( - tbl3 <- tbl |> - add_difference_row( - reference = "Placebo", - statistic = c("{estimate} ({std.error})", "{statistic} (p = {p.value})"), - pvalue_fun = label_style_pvalue(digits = 3), - estimate_fun = label_roche_number(digits = 1, scale = 100) - ) - ) - expect_snapshot(as.data.frame(tbl3)) - - # no error if overall column is present - expect_silent( - tbl4 <- tbl |> - add_overall(last = TRUE) |> - add_difference_row(reference = "Xanomeline High Dose") - ) - expect_equal( - as.data.frame(tbl4) |> names(), - c("", "Xanomeline High Dose \n(N = 84)", "Placebo \n(N = 86)", "Xanomeline Low Dose \n(N = 84)", "All Participants \nN = 254") - ) -}) - -test_that("add_difference_row.tbl_survfit_times() error messaging works", { - withr::local_options(list(width = 200)) - - expect_snapshot( - error = TRUE, - tbl_survfit_times( - data = cards::ADTTE, - times = c(30, 60) - ) |> - add_difference_row("Placebo") - ) - - expect_snapshot( - error = TRUE, - tbl_survfit_times( - data = cards::ADTTE, - by = TRTA, - times = c(30, 60) - ) |> - add_difference_row("No Treatment") +# tests/testthat/test-add_difference_row.R + +test_that("add_difference_row.tbl_survfit_times is defunct and throws correct error", { + # Why: We need a minimal mock object with the correct class to ensure S3 + # method dispatch correctly routes the call to our specific legacy function. + mock_tbl <- structure(list(), class = c("tbl_survfit_times", "gtsummary")) + + # Why: Since the function's sole purpose is to prevent use and guide migration, + # we test that it successfully aborts and that the error message explicitly + # mentions the new extractor function (get_surv_diff_df). + # Note: Parentheses in the regex must be escaped. + expect_error( + add_difference_row(mock_tbl), + regexp = "Please use the new `get_surv_diff_df\\(\\)` function" + ) + + # Why: Verifying the first part of the error message to ensure the user knows + # exactly which function is defunct. + expect_error( + add_difference_row(mock_tbl), + regexp = "`add_difference_row\\(\\)` is defunct for `tbl_survfit_times`" ) }) diff --git a/tests/testthat/test-get_surv_diff_df.R.R b/tests/testthat/test-get_surv_diff_df.R.R new file mode 100644 index 000000000..a5e092fb3 --- /dev/null +++ b/tests/testthat/test-get_surv_diff_df.R.R @@ -0,0 +1,86 @@ +# tests/testthat/test-get_surv_diff_df.R + +library(survival) + +# Setup: Create a standard survfit object to be used across multiple tests. +# We use the standard 'lung' dataset and convert 'sex' to a factor to ensure +# clear reference level matching. +surv_data <- survival::lung +surv_data$status <- surv_data$status - 1 +surv_data$sex <- factor(surv_data$sex, labels = c("Male", "Female")) +fit_km <- survfit(Surv(time, status) ~ sex, data = surv_data) + +test_that("get_surv_diff_df returns a correctly structured data.frame", { + # Why: Verifying the core functionality and output shape ensures downstream + # combining with get_surv_times_df() will not fail due to column mismatches. + res <- get_surv_diff_df(fit_km, times = c(100, 200), reference = "Male") + + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 2) + expect_equal( + names(res), + c("Strata", "Time", "N at Risk", "Survival", "95% CI", "p-value") + ) + expect_equal(res$Time, c(100, 200)) + + # Why: Ensures the strata parsing correctly labels the comparison + expect_true(all(res$Strata == "Female vs Male (Diff)")) + + # 'N at Risk' should be strictly missing for difference rows + expect_true(all(is.na(res$`N at Risk`))) +}) + +test_that("get_surv_diff_df correctly handles scale parameter", { + # Why: Users often want to present survival as percentages (scale = 100). + # We test this to ensure the mathematical multiplication is applied to both + # the estimate and the CI bounds before string formatting. + res_unscaled <- get_surv_diff_df( + fit_km, + times = 100, reference = "Male", scale = 1 + ) + res_scaled <- get_surv_diff_df( + fit_km, + times = 100, reference = "Male", scale = 100 + ) + + # Extract numeric values from the formatted strings for comparison + est_unscaled <- as.numeric(res_unscaled$Survival) + est_scaled <- as.numeric(res_scaled$Survival) + + expect_equal(round(est_scaled, 0), est_unscaled * 100) +}) + +test_that("get_surv_diff_df correctly handles custom confidence intervals", { + # Why: Custom confidence levels (e.g., 90% or 99%) should dynamically alter + # both the statistical calculation and the resulting column header. + res_90 <- get_surv_diff_df( + fit_km, + times = 100, reference = "Male", conf_int = 0.90 + ) + + expect_true("90% CI" %in% names(res_90)) + expect_false("95% CI" %in% names(res_90)) +}) + +test_that("get_surv_diff_df triggers aborts on invalid inputs", { + # Why: Rigorous type-checking ensures the function fails fast and informs the + # user exactly what went wrong before attempting to call cardx or dplyr. + + # 1. Invalid fit_km object + expect_error( + get_surv_diff_df(fit_km = "not_a_model", times = 100, reference = "Male"), + "`fit_km` must be a survfit object." + ) + + # 2. Invalid times (non-numeric) + expect_error( + get_surv_diff_df(fit_km = fit_km, times = "100", reference = "Male"), + "`times` must be a non-empty numeric vector." + ) + + # 3. Invalid times (empty vector) + expect_error( + get_surv_diff_df(fit_km = fit_km, times = numeric(0), reference = "Male"), + "`times` must be a non-empty numeric vector." + ) +}) diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index c9e6b6da4..dc0d283d8 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -129,4 +129,4 @@ test_that("df2gg engines (aligned and floating) work correctly", { hline = FALSE ) ) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 414c21a33..0bb284740 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -89,4 +89,4 @@ test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { "`add_overall()` is defunct for `tbl_survfit_times`", fixed = TRUE ) -}) \ No newline at end of file +}) From 43716ebb41ab55856bee6103c5d1af7a42eb9c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Thu, 18 Jun 2026 09:34:49 +0200 Subject: [PATCH 11/16] Docs --- NAMESPACE | 1 + _pkgdown.yml | 1 + man/add_difference_row.tbl_survfit_times.Rd | 25 +++++++++ man/annotate_gg_km.Rd | 19 ++++--- man/get_surv_diff_df.Rd | 60 +++++++++++++++++++++ man/tbl_survfit_times.Rd | 60 +-------------------- 6 files changed, 101 insertions(+), 65 deletions(-) create mode 100644 man/add_difference_row.tbl_survfit_times.Rd create mode 100644 man/get_surv_diff_df.Rd diff --git a/NAMESPACE b/NAMESPACE index 18f42c9e8..175aabdd4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(g_lineplot) export(g_lineplot_table) export(get_cox_pairwise_df) export(get_mmrm_results) +export(get_surv_diff_df) export(get_surv_times_df) export(gg_km) export(gg_lineplot) diff --git a/_pkgdown.yml b/_pkgdown.yml index 0a943e94d..4b3ae0fac 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -47,6 +47,7 @@ reference: - annotate_gg_km - get_cox_pairwise_df - get_surv_times_df + - get_surv_diff_df - title: "Forest Plot" contents: diff --git a/man/add_difference_row.tbl_survfit_times.Rd b/man/add_difference_row.tbl_survfit_times.Rd new file mode 100644 index 000000000..4ebb9dfaa --- /dev/null +++ b/man/add_difference_row.tbl_survfit_times.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_difference_row.R +\name{add_difference_row.tbl_survfit_times} +\alias{add_difference_row.tbl_survfit_times} +\title{Add Difference Row to tbl_survfit_times (Legacy)} +\usage{ +\method{add_difference_row}{tbl_survfit_times}(x, ...) +} +\arguments{ +\item{x}{(\code{tbl_survfit_times})\cr +A table object generated by \code{tbl_survfit_times()}.} + +\item{...}{Additional arguments passed to other methods.} +} +\value{ +Defunct; throws an error with migration instructions directing users +to \code{get_surv_diff_df()}. +} +\description{ +This function is maintained as a legacy method to provide a clear migration +path for existing scripts. Because the architecture of \code{tbl_survfit_times()} +was decoupled to separate data extraction from table rendering, it is no +longer possible to automatically compute survival differences from the +rendered table object. Users should transition to \code{get_surv_diff_df()}. +} diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index a7bd7b47c..1ad510882 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -18,7 +18,7 @@ annotate_riskdf( annotate_surv_med( gg_plt, - fit_km, + surv_tbl, table_position = c(x = 0.8, y = 0.85, w = 0.32, h = 0.16), ... ) @@ -38,8 +38,8 @@ require a pure \code{ggplot2} object.} \item{fit_km}{(\code{survfit})\cr A fitted Kaplan-Meier object of class \code{survfit} (from the \code{survival} -package). This object contains the necessary survival data used to -calculate and generate the content displayed in the annotation table.} +package). This object contains the necessary survival data used by +\code{annotate_riskdf()} to calculate and generate the content displayed.} \item{title}{(\code{character})\cr A single string value indicating whether to include a title above the @@ -65,6 +65,10 @@ have a background fill. Default is \code{TRUE}. inside the annotation box. Default is \code{10}. }} +\item{surv_tbl}{(\code{data.frame})\cr +A data frame containing the pre-calculated survival summary results, +such as the output from \code{get_surv_times_df()}.} + \item{table_position}{(\code{numeric})\cr A named numeric vector \code{c(x, y, w, h)} defining the position and size of the floating table. \code{x} and \code{y} are the coordinates (0 to 1), @@ -80,7 +84,7 @@ The function \code{annotate_riskdf} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. The function \code{annotate_surv_med} returns a \code{cowplot} object\cr -with the median survival table annotation added. +with the survival table annotation added. The function \code{annotate_coxph} returns a \code{cowplot} object\cr with the Cox-PH table annotation added. @@ -100,7 +104,7 @@ object (not a combined \code{cowplot} object) because it requires exact X-axis extraction. \item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a -median survival time summary table as an annotation box. +survival summary table as an annotation box. \item \code{annotate_coxph()}: The function \code{annotate_coxph()} adds a Cox Proportional Hazards summary table as an annotation box. @@ -130,8 +134,9 @@ use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) fit_kmg01 <- survival::survfit(formula, use_lung2) annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order -# Annotate Kaplan-Meier Plot with Median Survival Table -annotate_surv_med(plt_kmg01, fit_kmg01) +# Annotate Kaplan-Meier Plot with Survival Times Table +surv_df <- get_surv_times_df(fit_kmg01, times = c(100, 200)) +annotate_surv_med(plt_kmg01, surv_tbl = surv_df) # Annotate Kaplan-Meier Plot with Cox-PH Table coxph_tbl <- get_cox_pairwise_df( diff --git a/man/get_surv_diff_df.Rd b/man/get_surv_diff_df.Rd new file mode 100644 index 000000000..97e1fba80 --- /dev/null +++ b/man/get_surv_diff_df.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_surv_diff_df.R +\name{get_surv_diff_df} +\alias{get_surv_diff_df} +\title{Extract Survival Differences at Specific Times} +\usage{ +get_surv_diff_df(fit_km, times, reference, conf_int = 0.95, scale = 1) +} +\arguments{ +\item{fit_km}{(\code{survfit})\cr +A fitted Kaplan-Meier object of class \code{survfit}.} + +\item{times}{(\code{numeric})\cr +A numeric vector of time points at which to evaluate differences.} + +\item{reference}{(\code{character})\cr +The name of the reference strata level to calculate differences against.} + +\item{conf_int}{(\code{numeric})\cr +The confidence level to use for the intervals. Defaults to \code{0.95}.} + +\item{scale}{(\code{numeric})\cr +A scaling factor for the survival estimates. For example, \code{100} converts +probabilities to percentages. Defaults to \code{1}.} +} +\value{ +A \code{data.frame} with columns for \code{Strata}, \code{Time}, \code{Survival}, +\verb{XX\% CI}, and \code{p-value}. +} +\description{ +Calculates survival differences, confidence intervals, and p-values at +specified time points using \code{cardx::ard_survival_survfit_diff()}. The +resulting \code{data.frame} is structurally identical to the output of +\code{get_surv_times_df()}, allowing them to be easily bound together before +passing into \code{tbl_survfit_times()}. +} +\examples{ +library(survival) +surv_data <- lung +surv_data$status <- surv_data$status - 1 +surv_data$sex <- factor(surv_data$sex, labels = c("Male", "Female")) + +# 1. Fit the model +fit_km <- survfit(Surv(time, status) ~ sex, data = surv_data) + +# 2. Extract standard survival times +df_surv <- get_surv_times_df(fit_km, times = c(100, 200)) + +# 3. Extract survival differences (Female vs Male) +df_diff <- get_surv_diff_df( + fit_km, + times = c(100, 200), + reference = "Male" +) + +# 4. Combine and render +combined_df <- dplyr::bind_rows(df_surv, df_diff) +tbl_survfit_times(combined_df) + +} diff --git a/man/tbl_survfit_times.Rd b/man/tbl_survfit_times.Rd index 604aa7fd2..f29094fdc 100644 --- a/man/tbl_survfit_times.Rd +++ b/man/tbl_survfit_times.Rd @@ -1,45 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_difference_row.R, R/tbl_survfit_times.R -\name{add_difference_row.tbl_survfit_times} -\alias{add_difference_row.tbl_survfit_times} +% Please edit documentation in R/tbl_survfit_times.R +\name{tbl_survfit_times} \alias{tbl_survfit_times} \title{Create a gtsummary Table of Survival Times} \usage{ -\method{add_difference_row}{tbl_survfit_times}( - x, - reference, - statistic = c("{estimate}", "({conf.low}, {conf.high})", "{p.value}"), - conf.level = 0.95, - pvalue_fun = label_roche_pvalue(), - estimate_fun = label_roche_number(digits = 2, scale = 100), - ... -) - tbl_survfit_times(surv_df) } \arguments{ -\item{x}{(\code{tbl_summary})\cr -table created with \code{tbl_summary()}} - -\item{reference}{(\code{string})\cr -Value of the \code{tbl_survfit_times(by)} variable value that is the reference for -each of the difference calculations. For factors, use the character -level. The reference column will appear as the leftmost column in the table.} - -\item{conf.level}{(\code{numeric})\cr -a scalar in the interval \verb{(0, 1)} indicating the confidence level. Default is 0.95} - -\item{pvalue_fun}{(\code{function})\cr -Function to round and format the \code{p.value} statistic. Default is \code{\link[=label_roche_pvalue]{label_roche_pvalue()}}. -The function must have a numeric vector input, and return a string that is the -rounded/formatted p-value (e.g. \code{pvalue_fun = label_style_pvalue(digits = 3)}).} - -\item{estimate_fun}{(\code{\link[gtsummary:syntax]{formula-list-selector}})\cr -List of formulas specifying the functions -to round and format differences and confidence limits.} - -\item{...}{These dots are for future extensions and must be empty.} - \item{surv_df}{(\code{data.frame})\cr The results \code{data.frame} generated by \code{get_surv_times_df()}.} } @@ -51,30 +18,7 @@ Generates a \code{gtsummary} table from the survival times \code{data.frame} cre \code{get_surv_times_df()}. The table dynamically adapts to whichever columns the user leaves in the data frame, allowing for easy customization. } -\section{Methods (by generic)}{ -\itemize{ -\item \code{add_difference_row(tbl_survfit_times)}: Adds survival differences between groups as additional rows to tables created by \code{\link[=tbl_survfit_times]{tbl_survfit_times()}}. - -Difference statistics are calculated using \code{\link[cardx:ard_survival_survfit_diff]{cardx::ard_survival_survfit_diff()}} -for all \code{tbl_survfit_times(times)} variable values, using \code{survfit} formula: - -\if{html}{\out{
}}\preformatted{survival::survfit(y ~ by, data = data) -}\if{html}{\out{
}} - -where \code{y}, \code{by} and \code{data} are the inputs of the same names to the \code{tbl_survfit_times()} object \code{x}. - -Pairwise differences are calculated relative to the specified \code{by} variable's specified reference level. - -}} \examples{ -# Example 2 - Survival Differences ----------- -tbl_survfit_times( - data = cards::ADTTE, - by = "TRTA", - times = c(30, 60), - label = "Day {time}" -) |> - add_difference_row(reference = "Placebo") library(survival) surv_data <- lung surv_data$status <- surv_data$status - 1 From 2bee9141e3d318104f9ad65a8347a9e72f11a75b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Thu, 18 Jun 2026 09:47:17 +0200 Subject: [PATCH 12/16] More adjustments --- R/get_surv_diff_df.R | 2 +- R/tbl_survfit_times.R | 9 +++++++ tests/testthat/test-tbl_survfit_times.R | 35 +++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/R/get_surv_diff_df.R b/R/get_surv_diff_df.R index 473ce17cb..59b43c9c3 100644 --- a/R/get_surv_diff_df.R +++ b/R/get_surv_diff_df.R @@ -88,7 +88,7 @@ get_surv_diff_df <- function(fit_km, times, reference, conf_int = 0.95, res_df <- data.frame( Strata = paste(wide_df$comp_level, "vs", reference, "(Diff)"), Time = wide_df$time, - `N at Risk` = NA_character_, + `N at Risk` = NA_real_, Survival = sprintf("%.2f", wide_df$estimate * scale), CI = paste0( "(", sprintf("%.2f", wide_df$conf.low * scale), diff --git a/R/tbl_survfit_times.R b/R/tbl_survfit_times.R index 8d732d65c..0d5bb2538 100644 --- a/R/tbl_survfit_times.R +++ b/R/tbl_survfit_times.R @@ -88,15 +88,23 @@ tbl_survfit_times <- function(surv_df) { vars_include <- c(vars_include, "N at Risk") label_list <- c(label_list, list(`N at Risk` ~ "N at Risk")) } + if ("Survival" %in% names(data_subset)) { vars_include <- c(vars_include, "Survival") label_list <- c(label_list, list(Survival ~ "Survival")) } + if (!is.null(ci_col) && ci_col %in% names(data_subset)) { vars_include <- c(vars_include, ci_col) label_list <- c(label_list, stats::setNames(list(ci_col), ci_col)) } + # Capture the p-value column generated by difference tables + if ("p-value" %in% names(data_subset)) { + vars_include <- c(vars_include, "p-value") + label_list <- c(label_list, list(`p-value` ~ "p-value")) + } + res <- data_subset |> cards::ard_mvsummary( variables = dplyr::all_of(vars_include), @@ -113,6 +121,7 @@ tbl_survfit_times <- function(surv_df) { gtsummary::modify_header(gtsummary::all_stat_cols() ~ " ") |> gtsummary::modify_footnote(gtsummary::everything() ~ NA) + # Indent the confidence interval row for better readability if (!is.null(ci_col)) { res <- res |> gtsummary::modify_indent( diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 0bb284740..2ca6a25f3 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -90,3 +90,38 @@ test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { fixed = TRUE ) }) + +test_that("tbl_survfit_times correctly renders the p-value row from combined data", { + # 1. Setup data and fit the model + surv_data <- survival::lung + surv_data$status <- surv_data$status - 1 + surv_data$sex <- factor(surv_data$sex, labels = c("Male", "Female")) + + fit_km <- survival::survfit(survival::Surv(time, status) ~ sex, data = surv_data) + + # 2. Extract standard stats and differences + df_surv <- get_surv_times_df(fit_km, times = 100) + df_diff <- get_surv_diff_df(fit_km, times = 100, reference = "Male") + + # 3. Combine and render + combined_df <- dplyr::bind_rows(df_surv, df_diff) + res_tbl <- tbl_survfit_times(combined_df) + + # 4. Extract the underlying table body from the gtsummary object + tbl_body <- res_tbl$table_body + + # 5. Assertions + # The `.get_single_time_table` maps the column name to the `variable` + # or `label` column inside gtsummary's internal structure. + expect_true( + "p-value" %in% tbl_body$variable || "p-value" %in% tbl_body$label, + info = "The 'p-value' row was dropped and did not render in the final gtsummary table." + ) + + # Optionally, extract the specific row to ensure a value actually populated + p_val_row <- tbl_body[tbl_body$variable == "p-value" | tbl_body$label == "p-value", ] + expect_false( + all(is.na(p_val_row)), + info = "The p-value row was created, but all values inside it are NA." + ) +}) From 3c62cca77250d9ccf6cf6df2439f7707f779d22f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Thu, 18 Jun 2026 12:00:47 +0200 Subject: [PATCH 13/16] Some more adjustments + deprecation --- NAMESPACE | 2 - R/add_difference_row.R | 33 ----- R/annotate_gg_km.R | 126 ++++---------------- R/deprecated.R | 34 ++++++ R/get_surv_times_df.R | 7 +- R/tbl_survfit_times.R | 29 ----- man/add_difference_row.tbl_survfit_times.Rd | 25 ---- man/add_overall.tbl_survfit_times.Rd | 24 ---- man/annotate_gg_km.Rd | 50 +------- man/deprecated.Rd | 6 + tests/testthat/test-add_difference_row.R | 23 ---- 11 files changed, 65 insertions(+), 294 deletions(-) delete mode 100644 R/add_difference_row.R delete mode 100644 man/add_difference_row.tbl_survfit_times.Rd delete mode 100644 man/add_overall.tbl_survfit_times.Rd delete mode 100644 tests/testthat/test-add_difference_row.R diff --git a/NAMESPACE b/NAMESPACE index 175aabdd4..02a847b79 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,11 +16,9 @@ export(add_grade_column) export(add_hierarchical_count_row) export(add_overall) export(adjust_stat_columns_wrap) -export(annotate_coxph) export(annotate_lineplot_df) export(annotate_pkc_df) export(annotate_riskdf) -export(annotate_surv_med) export(ard_tabulate_abnormal_by_baseline) export(df_add_poolings) export(filter_hierarchical) diff --git a/R/add_difference_row.R b/R/add_difference_row.R deleted file mode 100644 index 292c862e9..000000000 --- a/R/add_difference_row.R +++ /dev/null @@ -1,33 +0,0 @@ -#' Add Difference Row to tbl_survfit_times (Legacy) -#' -#' @description -#' This function is maintained as a legacy method to provide a clear migration -#' path for existing scripts. Because the architecture of `tbl_survfit_times()` -#' was decoupled to separate data extraction from table rendering, it is no -#' longer possible to automatically compute survival differences from the -#' rendered table object. Users should transition to `get_surv_diff_df()`. -#' -#' @param x (`tbl_survfit_times`)\cr -#' A table object generated by `tbl_survfit_times()`. -#' @param ... Additional arguments passed to other methods. -#' -#' @return Defunct; throws an error with migration instructions directing users -#' to `get_surv_diff_df()`. -#' -#' @method add_difference_row tbl_survfit_times -#' @export -add_difference_row.tbl_survfit_times <- function(x, ...) { - # The table object no longer stores raw data or model formulas. - # We abort with a highly specific migration path directing the user - # to the new get_surv_diff_df() extractor to fit the decoupled design. - rlang::abort( - paste( - "`add_difference_row()` is defunct for `tbl_survfit_times`.", - "Since the architecture was decoupled, the table object no longer", - "contains the raw data required to calculate survival differences.", - "Please use the new `get_surv_diff_df()` function to extract difference", - "statistics, bind those rows to your survival dataframe using", - "`dplyr::bind_rows()`, and then render the final `tbl_survfit_times()`." - ) - ) -} diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 5144998a0..f9e274361 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -183,125 +183,41 @@ annotate_riskdf <- function(gg_plt, res } -#' @describeIn annotate_gg_km The `annotate_surv_med` function adds a -#' survival summary table as an annotation box. -#' -#' @return The function `annotate_surv_med` returns a `cowplot` object\cr -#' with the survival table annotation added. -#' -#' @examples -#' # Annotate Kaplan-Meier Plot with Survival Times Table -#' surv_df <- get_surv_times_df(fit_kmg01, times = c(100, 200)) -#' annotate_surv_med(plt_kmg01, surv_tbl = surv_df) -#' -#' @export -annotate_surv_med <- function(gg_plt, - surv_tbl, - table_position = c( - x = 0.8, - y = 0.85, - w = 0.32, - h = 0.16 - ), +annotate_surv_med <- function(gg_plt, surv_tbl, + table_position = c(x = 0.8, y = 0.85, w = 0.32, h = 0.16), ...) { - default_eargs <- list( - font_size = 10, - fill = TRUE - ) - - eargs <- list(...) - eargs <- utils::modifyList(default_eargs, eargs) - - if (!inherits(gg_plt, c("gg", "ggplot", "cowplot"))) { - rlang::abort("`gg_plt` must be a ggplot or cowplot object.") - } - - if (!inherits(surv_tbl, "data.frame")) { - rlang::abort("`surv_tbl` must be a data.frame.") - } - - if (!identical(rownames(surv_tbl), as.character(seq_len(nrow(surv_tbl))))) { - surv_tbl <- data.frame( - " " = rownames(surv_tbl), - surv_tbl, - check.names = FALSE - ) - } - - bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] - - # Call the floating table engine - res <- df2gg_floating( - df = surv_tbl, - gg_plt = gg_plt, - x = table_position["x"], - y = table_position["y"], - w = table_position["w"], - h = table_position["h"], - font_size = eargs[["font_size"]], - colwidths = NULL, - bg_fill = bg_fill - ) - - res + eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) + .add_floating_annotation(gg_plt, surv_tbl, table_position, eargs) } -#' @describeIn annotate_gg_km The function `annotate_coxph()` adds a Cox -#' Proportional Hazards summary table as an annotation box. -#' -#' @return The function `annotate_coxph` returns a `cowplot` object\cr -#' with the Cox-PH table annotation added. -#' -#' @examples -#' # Annotate Kaplan-Meier Plot with Cox-PH Table -#' coxph_tbl <- get_cox_pairwise_df( -#' formula, -#' data = use_lung, arm = "arm", ref_group = "A" -#' ) -#' result <- annotate_coxph(plt_kmg01, coxph_tbl) -#' -#' # Extract original plots from any annotated result -#' attr(result, "plotlist")$main -#' -#' @export -annotate_coxph <- function(gg_plt, - coxph_tbl, - table_position = c( - x = 0.29, - y = 0.51, - w = 0.4, - h = 0.125 - ), +annotate_coxph <- function(gg_plt, coxph_tbl, + table_position = c(x = 0.29, y = 0.51, w = 0.4, h = 0.125), ...) { - default_eargs <- list( - fill = TRUE, - font_size = 10 - ) - - eargs <- list(...) - eargs <- utils::modifyList(default_eargs, eargs) + eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) + .add_floating_annotation(gg_plt, coxph_tbl, table_position, eargs) +} +#' Internal helper for floating table annotations +#' @keywords internal +#' @noRd +.add_floating_annotation <- function(gg_plt, tbl, table_position, eargs) { if (!inherits(gg_plt, c("gg", "ggplot", "cowplot"))) { rlang::abort("`gg_plt` must be a ggplot or cowplot object.") } - if (!inherits(coxph_tbl, "data.frame")) { - rlang::abort("`coxph_tbl` must be a data.frame.") + if (!inherits(tbl, "data.frame")) { + rlang::abort("Input table must be a data.frame.") } - if (!identical(rownames(coxph_tbl), as.character(seq_len(nrow(coxph_tbl))))) { - coxph_tbl <- data.frame( - " " = rownames(coxph_tbl), - coxph_tbl, - check.names = FALSE - ) + # Abstracted rowname check + if (!identical(rownames(tbl), as.character(seq_len(nrow(tbl))))) { + tbl <- data.frame(" " = rownames(tbl), tbl, check.names = FALSE) } bg_fill <- if (isTRUE(eargs[["fill"]])) "#00000020" else eargs[["fill"]] - # Call the floating table engine - res <- df2gg_floating( - df = coxph_tbl, + df2gg_floating( + df = tbl, gg_plt = gg_plt, x = table_position["x"], y = table_position["y"], @@ -311,6 +227,4 @@ annotate_coxph <- function(gg_plt, colwidths = NULL, bg_fill = bg_fill ) - - res } diff --git a/R/deprecated.R b/R/deprecated.R index 3e73db427..ed756b7be 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -53,3 +53,37 @@ preprocess_lineplot_data <- function(...) { "crane::annotate_gg()" ) } + +#' @rdname deprecated +#' @export +add_overall.tbl_survfit_times <- function(x, ...) { + lifecycle::deprecate_stop( + when = "0.3.4", # Replace with the package version where this became defunct + what = "add_overall()", + details = paste( + "Since we decoupled the architecture, a user wanting an overall column", + "alongside stratified columns should simply fit an unstratified model,", + "extract the dataframe, and use standard `dplyr::bind_rows()` with", + "their stratified dataframe before pushing the combined output into", + "`tbl_survfit_times()`." + ) + ) +} + +#' @rdname deprecated +#' @export +add_difference_row.tbl_survfit_times <- function(x, ...) { + lifecycle::deprecate_stop( + when = "0.3.4", # Replace with the package version where this became defunct + what = "add_difference_row()", + with = "get_surv_diff_df()", + details = paste( + "`add_difference_row()` is defunct for `tbl_survfit_times`.", + "Since the architecture was decoupled, the table object no longer", + "contains the raw data required to calculate survival differences.", + "Please use the new `get_surv_diff_df()` function to extract difference", + "statistics, bind those rows to your survival dataframe using", + "`dplyr::bind_rows()`, and then render the final `tbl_survfit_times()`." + ) + ) +} diff --git a/R/get_surv_times_df.R b/R/get_surv_times_df.R index d67939014..6c74f4047 100644 --- a/R/get_surv_times_df.R +++ b/R/get_surv_times_df.R @@ -60,12 +60,7 @@ get_surv_times_df <- function(fit_km, times, conf_int = 0.95, scale = 1) { # Remove variable prefix from strata to maintain clean downstream headers # without cluttering tables with 'arm=A', 'arm=B' etc. if (!is.null(fit_km$strata)) { - strata_lst <- strsplit(sub("=", "equals", strata_levels), "equals") - strata_levels <- vapply( - strata_lst, - FUN = function(x) x[2], - FUN.VALUE = character(1) - ) + strata_levels <- sub("^[^=]+=", "", strata_levels) } df <- data.frame( diff --git a/R/tbl_survfit_times.R b/R/tbl_survfit_times.R index 0d5bb2538..e9a66b6d1 100644 --- a/R/tbl_survfit_times.R +++ b/R/tbl_survfit_times.R @@ -134,32 +134,3 @@ tbl_survfit_times <- function(surv_df) { res } -#' Add Overall Column to tbl_survfit_times (Legacy) -#' -#' @description -#' This function is maintained as a legacy method to provide a clear migration -#' path for existing scripts. Because the architecture of `tbl_survfit_times()` -#' was decoupled to separate data extraction from table rendering, it is no -#' longer possible to automatically compute unstratified overall statistics -#' from the rendered table object. -#' -#' @param x (`tbl_survfit_times`)\cr -#' A table object generated by `tbl_survfit_times()`. -#' @param ... Additional arguments passed to other methods. -#' -#' @return Defunct; throws an error with migration instructions. -#' -#' @method add_overall tbl_survfit_times -#' @export -add_overall.tbl_survfit_times <- function(x, ...) { - rlang::abort( - paste( - "`add_overall()` is defunct for `tbl_survfit_times`.", - "Since we decoupled the architecture, a user wanting an overall column", - "alongside stratified columns should simply fit an unstratified model,", - "extract the dataframe, and use standard `dplyr::bind_rows()` with", - "their stratified dataframe before pushing the combined output into", - "`tbl_survfit_times()`." - ) - ) -} diff --git a/man/add_difference_row.tbl_survfit_times.Rd b/man/add_difference_row.tbl_survfit_times.Rd deleted file mode 100644 index 4ebb9dfaa..000000000 --- a/man/add_difference_row.tbl_survfit_times.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_difference_row.R -\name{add_difference_row.tbl_survfit_times} -\alias{add_difference_row.tbl_survfit_times} -\title{Add Difference Row to tbl_survfit_times (Legacy)} -\usage{ -\method{add_difference_row}{tbl_survfit_times}(x, ...) -} -\arguments{ -\item{x}{(\code{tbl_survfit_times})\cr -A table object generated by \code{tbl_survfit_times()}.} - -\item{...}{Additional arguments passed to other methods.} -} -\value{ -Defunct; throws an error with migration instructions directing users -to \code{get_surv_diff_df()}. -} -\description{ -This function is maintained as a legacy method to provide a clear migration -path for existing scripts. Because the architecture of \code{tbl_survfit_times()} -was decoupled to separate data extraction from table rendering, it is no -longer possible to automatically compute survival differences from the -rendered table object. Users should transition to \code{get_surv_diff_df()}. -} diff --git a/man/add_overall.tbl_survfit_times.Rd b/man/add_overall.tbl_survfit_times.Rd deleted file mode 100644 index a6c0c3242..000000000 --- a/man/add_overall.tbl_survfit_times.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tbl_survfit_times.R -\name{add_overall.tbl_survfit_times} -\alias{add_overall.tbl_survfit_times} -\title{Add Overall Column to tbl_survfit_times (Legacy)} -\usage{ -\method{add_overall}{tbl_survfit_times}(x, ...) -} -\arguments{ -\item{x}{(\code{tbl_survfit_times})\cr -A table object generated by \code{tbl_survfit_times()}.} - -\item{...}{Additional arguments passed to other methods.} -} -\value{ -Defunct; throws an error with migration instructions. -} -\description{ -This function is maintained as a legacy method to provide a clear migration -path for existing scripts. Because the architecture of \code{tbl_survfit_times()} -was decoupled to separate data extraction from table rendering, it is no -longer possible to automatically compute unstratified overall statistics -from the rendered table object. -} diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 1ad510882..dac9d222b 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -3,8 +3,6 @@ \name{annotate_gg_km} \alias{annotate_gg_km} \alias{annotate_riskdf} -\alias{annotate_surv_med} -\alias{annotate_coxph} \title{Annotate Kaplan-Meier Plot} \usage{ annotate_riskdf( @@ -15,20 +13,6 @@ annotate_riskdf( xlab = "Days", ... ) - -annotate_surv_med( - gg_plt, - surv_tbl, - table_position = c(x = 0.8, y = 0.85, w = 0.32, h = 0.16), - ... -) - -annotate_coxph( - gg_plt, - coxph_tbl, - table_position = c(x = 0.29, y = 0.51, w = 0.4, h = 0.125), - ... -) } \arguments{ \item{gg_plt}{(\code{ggplot2} or \code{cowplot})\cr @@ -69,25 +53,19 @@ inside the annotation box. Default is \code{10}. A data frame containing the pre-calculated survival summary results, such as the output from \code{get_surv_times_df()}.} +\item{coxph_tbl}{(\code{data.frame})\cr +A data frame containing the pre-calculated Cox-PH results, derived +using function \code{get_cox_pairwise_df()}.} + \item{table_position}{(\code{numeric})\cr A named numeric vector \code{c(x, y, w, h)} defining the position and size of the floating table. \code{x} and \code{y} are the coordinates (0 to 1), while \code{w} and \code{h} represent width and height (0 to 1). Defaults vary by function.} - -\item{coxph_tbl}{(\code{data.frame})\cr -A data frame containing the pre-calculated Cox-PH results, derived -using function \code{get_cox_pairwise_df()}.} } \value{ The function \code{annotate_riskdf} returns a \code{cowplot} object combining the KM plot and the 'Numbers at Risk' table. - -The function \code{annotate_surv_med} returns a \code{cowplot} object\cr -with the survival table annotation added. - -The function \code{annotate_coxph} returns a \code{cowplot} object\cr -with the Cox-PH table annotation added. } \description{ These functions provide capabilities to annotate Kaplan-Meier plots (\code{\link[=gg_km]{gg_km()}}) @@ -103,12 +81,6 @@ at Risk" table below a Kaplan-Meier plot using \code{patchwork}.\cr object (not a combined \code{cowplot} object) because it requires exact X-axis extraction. -\item \code{annotate_surv_med()}: The \code{annotate_surv_med} function adds a -survival summary table as an annotation box. - -\item \code{annotate_coxph()}: The function \code{annotate_coxph()} adds a Cox -Proportional Hazards summary table as an annotation box. - }} \examples{ \dontshow{if (requireNamespace("survival", quietly = TRUE)) withAutoprint(\{ # examplesIf} @@ -134,20 +106,6 @@ use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) fit_kmg01 <- survival::survfit(formula, use_lung2) annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order -# Annotate Kaplan-Meier Plot with Survival Times Table -surv_df <- get_surv_times_df(fit_kmg01, times = c(100, 200)) -annotate_surv_med(plt_kmg01, surv_tbl = surv_df) - -# Annotate Kaplan-Meier Plot with Cox-PH Table -coxph_tbl <- get_cox_pairwise_df( - formula, - data = use_lung, arm = "arm", ref_group = "A" -) -result <- annotate_coxph(plt_kmg01, coxph_tbl) - -# Extract original plots from any annotated result -attr(result, "plotlist")$main - } \seealso{ \code{\link[=gg_km]{gg_km()}}, \code{\link[=process_survfit]{process_survfit()}}, and \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} for diff --git a/man/deprecated.Rd b/man/deprecated.Rd index 5a1d0f660..b79daef25 100644 --- a/man/deprecated.Rd +++ b/man/deprecated.Rd @@ -6,6 +6,8 @@ \alias{g_lineplot} \alias{g_lineplot_table} \alias{preprocess_lineplot_data} +\alias{add_overall.tbl_survfit_times} +\alias{add_difference_row.tbl_survfit_times} \title{Deprecated functions} \usage{ tbl_demographics(..., nonmissing = "always") @@ -15,6 +17,10 @@ g_lineplot(...) g_lineplot_table(...) preprocess_lineplot_data(...) + +\method{add_overall}{tbl_survfit_times}(x, ...) + +\method{add_difference_row}{tbl_survfit_times}(x, ...) } \value{ Warnings diff --git a/tests/testthat/test-add_difference_row.R b/tests/testthat/test-add_difference_row.R deleted file mode 100644 index 7008fb326..000000000 --- a/tests/testthat/test-add_difference_row.R +++ /dev/null @@ -1,23 +0,0 @@ -# tests/testthat/test-add_difference_row.R - -test_that("add_difference_row.tbl_survfit_times is defunct and throws correct error", { - # Why: We need a minimal mock object with the correct class to ensure S3 - # method dispatch correctly routes the call to our specific legacy function. - mock_tbl <- structure(list(), class = c("tbl_survfit_times", "gtsummary")) - - # Why: Since the function's sole purpose is to prevent use and guide migration, - # we test that it successfully aborts and that the error message explicitly - # mentions the new extractor function (get_surv_diff_df). - # Note: Parentheses in the regex must be escaped. - expect_error( - add_difference_row(mock_tbl), - regexp = "Please use the new `get_surv_diff_df\\(\\)` function" - ) - - # Why: Verifying the first part of the error message to ensure the user knows - # exactly which function is defunct. - expect_error( - add_difference_row(mock_tbl), - regexp = "`add_difference_row\\(\\)` is defunct for `tbl_survfit_times`" - ) -}) From 8fa67111ea7b52cb7a5aa151df9bfa63f395074f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Thu, 18 Jun 2026 13:03:05 +0200 Subject: [PATCH 14/16] fix test for survfit_times --- tests/testthat/test-tbl_survfit_times.R | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/testthat/test-tbl_survfit_times.R b/tests/testthat/test-tbl_survfit_times.R index 2ca6a25f3..0813134b9 100644 --- a/tests/testthat/test-tbl_survfit_times.R +++ b/tests/testthat/test-tbl_survfit_times.R @@ -72,25 +72,6 @@ test_that("tbl_survfit_times() catches invalid inputs", { ) }) -test_that("add_overall.tbl_survfit_times() acts as a legacy guard", { - fit <- survival::survfit( - survival::Surv(AVAL, 1 - CNSR) ~ TRTA, - data = cards::ADTTE - ) - df <- get_surv_times_df(fit, times = 30) - tbl <- tbl_survfit_times(df) - - # Check that the class was properly assigned - expect_s3_class(tbl, "tbl_survfit_times") - - # Check that calling add_overall() throws our informative migration error - expect_error( - gtsummary::add_overall(tbl), - "`add_overall()` is defunct for `tbl_survfit_times`", - fixed = TRUE - ) -}) - test_that("tbl_survfit_times correctly renders the p-value row from combined data", { # 1. Setup data and fit the model surv_data <- survival::lung From 0120c6b9d6bc93136f5e7b3ac9ca05a9dee3ebb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Thu, 18 Jun 2026 13:36:10 +0200 Subject: [PATCH 15/16] updating docs and annotate_gg_km --- NAMESPACE | 2 ++ R/annotate_gg_km.R | 76 +++++++++++++++++++------------------------ man/annotate_gg_km.Rd | 46 ++++++++++++-------------- 3 files changed, 55 insertions(+), 69 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 02a847b79..175aabdd4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,9 +16,11 @@ export(add_grade_column) export(add_hierarchical_count_row) export(add_overall) export(adjust_stat_columns_wrap) +export(annotate_coxph) export(annotate_lineplot_df) export(annotate_pkc_df) export(annotate_riskdf) +export(annotate_surv_med) export(ard_tabulate_abnormal_by_baseline) export(df_add_poolings) export(filter_hierarchical) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index f9e274361..c814d7678 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -66,25 +66,7 @@ #' @name annotate_gg_km NULL -#' @describeIn annotate_gg_km The function `annotate_riskdf` adds a "Numbers -#' at Risk" table below a Kaplan-Meier plot using `patchwork`.\cr -#' **Note:** For this specific function, `gg_plt` must be a pure `ggplot2` -#' object (not a combined `cowplot` object) because it requires exact X-axis -#' extraction. -#' -#' @return The function `annotate_riskdf` returns a `cowplot` object combining -#' the KM plot and the 'Numbers at Risk' table. -#' -#' @examples -#' # Annotate Plot with Numbers at Risk Table -#' annotate_riskdf(plt_kmg01, fit_kmg01) -#' -#' # Change order of y-axis (arm) -#' use_lung2 <- use_lung -#' use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) -#' fit_kmg01 <- survival::survfit(formula, use_lung2) -#' annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order -#' +#' @rdname annotate_gg_km #' @export annotate_riskdf <- function(gg_plt, fit_km, @@ -135,15 +117,8 @@ annotate_riskdf <- function(gg_plt, strata = strata_levels ) } else { - strata_lst <- strsplit( - sub("=", "equals", levels(annot_tbl$strata)), - "equals" - ) - levels(annot_tbl$strata) <- matrix( - unlist(strata_lst), - ncol = 2, - byrow = TRUE - )[, 2] + # Utilizing base R regex to safely strip variable names from strata levels + levels(annot_tbl$strata) <- sub("^[^=]+=", "", levels(annot_tbl$strata)) data.frame( n.risk = annot_tbl$n.risk, @@ -183,20 +158,6 @@ annotate_riskdf <- function(gg_plt, res } -annotate_surv_med <- function(gg_plt, surv_tbl, - table_position = c(x = 0.8, y = 0.85, w = 0.32, h = 0.16), - ...) { - eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) - .add_floating_annotation(gg_plt, surv_tbl, table_position, eargs) -} - -annotate_coxph <- function(gg_plt, coxph_tbl, - table_position = c(x = 0.29, y = 0.51, w = 0.4, h = 0.125), - ...) { - eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) - .add_floating_annotation(gg_plt, coxph_tbl, table_position, eargs) -} - #' Internal helper for floating table annotations #' @keywords internal #' @noRd @@ -209,7 +170,6 @@ annotate_coxph <- function(gg_plt, coxph_tbl, rlang::abort("Input table must be a data.frame.") } - # Abstracted rowname check if (!identical(rownames(tbl), as.character(seq_len(nrow(tbl))))) { tbl <- data.frame(" " = rownames(tbl), tbl, check.names = FALSE) } @@ -228,3 +188,33 @@ annotate_coxph <- function(gg_plt, coxph_tbl, bg_fill = bg_fill ) } + +#' @rdname annotate_gg_km +#' @export +annotate_surv_med <- function(gg_plt, + surv_tbl, + table_position = c( + x = 0.8, + y = 0.85, + w = 0.32, + h = 0.16 + ), + ...) { + eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) + .add_floating_annotation(gg_plt, surv_tbl, table_position, eargs) +} + +#' @rdname annotate_gg_km +#' @export +annotate_coxph <- function(gg_plt, + coxph_tbl, + table_position = c( + x = 0.29, + y = 0.51, + w = 0.4, + h = 0.125 + ), + ...) { + eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) + .add_floating_annotation(gg_plt, coxph_tbl, table_position, eargs) +} \ No newline at end of file diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index dac9d222b..4b90f34e0 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -3,6 +3,8 @@ \name{annotate_gg_km} \alias{annotate_gg_km} \alias{annotate_riskdf} +\alias{annotate_surv_med} +\alias{annotate_coxph} \title{Annotate Kaplan-Meier Plot} \usage{ annotate_riskdf( @@ -13,6 +15,20 @@ annotate_riskdf( xlab = "Days", ... ) + +annotate_surv_med( + gg_plt, + surv_tbl, + table_position = c(x = 0.8, y = 0.85, w = 0.32, h = 0.16), + ... +) + +annotate_coxph( + gg_plt, + coxph_tbl, + table_position = c(x = 0.29, y = 0.51, w = 0.4, h = 0.125), + ... +) } \arguments{ \item{gg_plt}{(\code{ggplot2} or \code{cowplot})\cr @@ -53,19 +69,15 @@ inside the annotation box. Default is \code{10}. A data frame containing the pre-calculated survival summary results, such as the output from \code{get_surv_times_df()}.} -\item{coxph_tbl}{(\code{data.frame})\cr -A data frame containing the pre-calculated Cox-PH results, derived -using function \code{get_cox_pairwise_df()}.} - \item{table_position}{(\code{numeric})\cr A named numeric vector \code{c(x, y, w, h)} defining the position and size of the floating table. \code{x} and \code{y} are the coordinates (0 to 1), while \code{w} and \code{h} represent width and height (0 to 1). Defaults vary by function.} -} -\value{ -The function \code{annotate_riskdf} returns a \code{cowplot} object combining -the KM plot and the 'Numbers at Risk' table. + +\item{coxph_tbl}{(\code{data.frame})\cr +A data frame containing the pre-calculated Cox-PH results, derived +using function \code{get_cox_pairwise_df()}.} } \description{ These functions provide capabilities to annotate Kaplan-Meier plots (\code{\link[=gg_km]{gg_km()}}) @@ -73,15 +85,6 @@ with additional summary tables, including median survival times, numbers at risk, and cox proportional hazards results. The annotations are added using the \code{cowplot} package for flexible placement. } -\section{Functions}{ -\itemize{ -\item \code{annotate_riskdf()}: The function \code{annotate_riskdf} adds a "Numbers -at Risk" table below a Kaplan-Meier plot using \code{patchwork}.\cr -\strong{Note:} For this specific function, \code{gg_plt} must be a pure \code{ggplot2} -object (not a combined \code{cowplot} object) because it requires exact X-axis -extraction. - -}} \examples{ \dontshow{if (requireNamespace("survival", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Preparing the Kaplan-Meier Plot @@ -97,15 +100,6 @@ surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) \dontshow{\}) # examplesIf} -# Annotate Plot with Numbers at Risk Table -annotate_riskdf(plt_kmg01, fit_kmg01) - -# Change order of y-axis (arm) -use_lung2 <- use_lung -use_lung2$arm <- factor(use_lung2$arm, levels = c("C", "B", "A")) -fit_kmg01 <- survival::survfit(formula, use_lung2) -annotate_riskdf(plt_kmg01, fit_kmg01) # rerun gg_km to change legend order - } \seealso{ \code{\link[=gg_km]{gg_km()}}, \code{\link[=process_survfit]{process_survfit()}}, and \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}} for From 0fc082a3091fd21715eb05f69d33376cd716b58f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Thu, 18 Jun 2026 14:44:17 +0200 Subject: [PATCH 16/16] style --- R/annotate_gg_km.R | 2 +- R/tbl_survfit_times.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index c814d7678..6d09fddac 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -217,4 +217,4 @@ annotate_coxph <- function(gg_plt, ...) { eargs <- utils::modifyList(list(font_size = 10, fill = TRUE), list(...)) .add_floating_annotation(gg_plt, coxph_tbl, table_position, eargs) -} \ No newline at end of file +} diff --git a/R/tbl_survfit_times.R b/R/tbl_survfit_times.R index e9a66b6d1..a7c68cefd 100644 --- a/R/tbl_survfit_times.R +++ b/R/tbl_survfit_times.R @@ -133,4 +133,3 @@ tbl_survfit_times <- function(surv_df) { res } -