diff --git a/R/Run_Mod.R b/R/Run_Mod.R index 37ca2a16..8eb8ff14 100644 --- a/R/Run_Mod.R +++ b/R/Run_Mod.R @@ -44,7 +44,7 @@ #' Note: These objects can be large. #' @param correlated Logical; use Chapter-2 Kronecker prior across biomarkers. #' Default FALSE (independence). -#' @param file_mod_kron Path to a JAGS file for the Kronecker model.If +#' @param file_mod_kron Path to a JAGS file for the Kronecker model. If #' `correlated = TRUE` and this path does not exist, a temporary #' `model_ch2_kron.jags` is written and used automatically. #' @returns An `sr_model` class object: a subclass of [dplyr::tbl_df] that @@ -81,7 +81,8 @@ #' - An optional `"jags.post"` attribute, included when argument #' `with_post` = TRUE. #' @inheritDotParams prep_priors -#' @seealso clean_priors, prep_priors_multi_b, inits_kron, write_model_ch2_kron +#' @seealso [clean_priors()], [prep_priors_multi_b()], [inits_kron()], +#' [write_model_ch2_kron()], [build_kron_priors()] #' @export #' @example inst/examples/run_mod-examples.R run_mod <- function(data, @@ -132,28 +133,25 @@ run_mod <- function(data, # prepare data for modeline longdata <- prep_data(dl_sub) + # Prepare base priors (used by both models) + base_priors <- prep_priors( + max_antigens = longdata$n_antigen_isos, + ... + ) + # ---------- CHOOSE MODEL/PRIORS DEPENDING ON `correlated` ---------- if (!correlated) { # original (independence) behavior - priorspec <- prep_priors( - max_antigens = longdata$n_antigen_isos, ... - ) + priorspec <- base_priors model_path <- file_mod # UNCHANGED for independence init_fun <- initsfunction to_monitor <- c("y0", "y1", "t1", "alpha", "shape") } else { # CH2 behavior: Kronecker prior across biomarkers - base_priors <- prep_priors( - max_antigens = longdata$n_antigen_isos, ... - ) - base_priors <- serodynamics::clean_priors(base_priors) - - kron_priors <- serodynamics::prep_priors_multi_b( + priorspec <- build_kron_priors( + base_priors = base_priors, n_blocks = longdata$n_antigen_isos ) - B_scalar <- list(n_blocks = longdata$n_antigen_isos) - - priorspec <- c(base_priors, kron_priors, B_scalar) # Changed: use file_mod_kron when correlated = TRUE model_path <- file_mod_kron diff --git a/R/build_kron_priors.R b/R/build_kron_priors.R new file mode 100644 index 00000000..21a391f5 --- /dev/null +++ b/R/build_kron_priors.R @@ -0,0 +1,28 @@ +#' @title Build Kronecker priors specification +#' @author Kwan Ho Lee +#' @description +#' `build_kron_priors()` combines base priors with Kronecker-specific +#' hyperparameters for the correlated model. It cleans the base priors, +#' adds the Kronecker priors, and includes the scalar `n_blocks`. +#' +#' @param base_priors A [base::list()] of priors from [prep_priors()]. +#' @param n_blocks Integer scalar: number of biomarkers. +#' +#' @return A [base::list()] with cleaned base priors, Kronecker priors +#' (OmegaP, nuP, OmegaB, nuB), and n_blocks. +#' +#' @seealso [prep_priors()], [clean_priors()], [prep_priors_multi_b()] +#' @keywords internal +build_kron_priors <- function(base_priors, n_blocks) { + # Clean legacy fields from base priors + base_priors <- clean_priors(base_priors) + + # Add Kronecker hyperpriors + kron_priors <- prep_priors_multi_b(n_blocks = n_blocks) + + # Add scalar n_blocks + B_scalar <- list(n_blocks = n_blocks) + + # Combine all pieces + c(base_priors, kron_priors, B_scalar) +} diff --git a/R/prep_priors_multi_b.R b/R/prep_priors_multi_b.R index f5ae0682..4e563050 100644 --- a/R/prep_priors_multi_b.R +++ b/R/prep_priors_multi_b.R @@ -34,8 +34,9 @@ prep_priors_multi_b <- function( cli::cli_abort("`omega_p_scale` must be a numeric vector of length 5.") } if (!is.numeric(omega_b_scale) || length(omega_b_scale) != n_blocks) { - cli::cli_abort("`omega_b_scale` must be a numeric vector of length - `n_blocks`.") + cli::cli_abort( + "`omega_b_scale` must be a numeric vector of length `n_blocks`." + ) } if (!is.numeric(nu_p) || length(nu_p) != 1L) { cli::cli_abort("`nu_p` must be a numeric scalar.") diff --git a/inst/examples/examples-run_mod_kron.R b/inst/examples/examples-run_mod_kron.R deleted file mode 100644 index 05c518c7..00000000 --- a/inst/examples/examples-run_mod_kron.R +++ /dev/null @@ -1,47 +0,0 @@ -# Minimal end-to-end example (small settings for speed) - -# 1) Write the Kronecker model once -model_path <- write_model_ch2_kron(file.path(tempdir(), "model_ch2_kron.jags")) - -# 2) Simulate a tiny dataset -set.seed(926) -n_blocks <- 2 -time_grid <- c(0, 7, 14, 30) - -sd_p <- c(0.35, 0.45, 0.25, 0.30, 0.25) -R_p <- matrix(0.25, 5, 5) -diag(R_p) <- 1 -sigma_p <- diag(sd_p) %*% R_p %*% diag(sd_p) - -R_b <- matrix(c(1, 0.5, - 0.5, 1), n_blocks, n_blocks, byrow = TRUE) -sd_b <- rep(0.6, n_blocks) -sigma_b <- diag(sd_b) %*% R_b %*% diag(sd_b) - -sim <- simulate_multi_b_long( - n_id = 5, - n_blocks = n_blocks, - time_grid = time_grid, - sigma_p = sigma_p, - sigma_b = sigma_b -) - -# 3) Convert to case_data expected by prep_data/run_mod -long_tbl <- serodynamics::as_case_data( - sim$data, - id_var = "Subject", - biomarker_var = "antigen_iso", - value_var = "value", - time_in_days = "time_days" -) - -# 4) Fit the Kronecker model (very small MCMC for demonstration) -if (interactive()) { - out <- run_mod_kron( - data = long_tbl, - file_mod = model_path, - nchain = 2, nadapt = 200, nburn = 200, - nmc = 200, niter = 2000, - strat = NA - ) -} diff --git a/man/build_kron_priors.Rd b/man/build_kron_priors.Rd new file mode 100644 index 00000000..8869dc47 --- /dev/null +++ b/man/build_kron_priors.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_kron_priors.R +\name{build_kron_priors} +\alias{build_kron_priors} +\title{Build Kronecker priors specification} +\usage{ +build_kron_priors(base_priors, n_blocks) +} +\arguments{ +\item{base_priors}{A \code{\link[base:list]{base::list()}} of priors from \code{\link[=prep_priors]{prep_priors()}}.} + +\item{n_blocks}{Integer scalar: number of biomarkers.} +} +\value{ +A \code{\link[base:list]{base::list()}} with cleaned base priors, Kronecker priors +(OmegaP, nuP, OmegaB, nuB), and n_blocks. +} +\description{ +\code{build_kron_priors()} combines base priors with Kronecker-specific +hyperparameters for the correlated model. It cleans the base priors, +adds the Kronecker priors, and includes the scalar \code{n_blocks}. +} +\seealso{ +\code{\link[=prep_priors]{prep_priors()}}, \code{\link[=clean_priors]{clean_priors()}}, \code{\link[=prep_priors_multi_b]{prep_priors_multi_b()}} +} +\author{ +Kwan Ho Lee +} +\keyword{internal} diff --git a/man/run_mod.Rd b/man/run_mod.Rd index cb4c3a4c..2315aab3 100644 --- a/man/run_mod.Rd +++ b/man/run_mod.Rd @@ -97,7 +97,7 @@ If specified, must be 2 values long: \item{correlated}{Logical; use Chapter-2 Kronecker prior across biomarkers. Default FALSE (independence).} -\item{file_mod_kron}{Path to a JAGS file for the Kronecker model.If +\item{file_mod_kron}{Path to a JAGS file for the Kronecker model. If \code{correlated = TRUE} and this path does not exist, a temporary \code{model_ch2_kron.jags} is written and used automatically.} } @@ -245,7 +245,8 @@ if (!is.element(runjags::findjags(), c("", NULL))) { # } } \seealso{ -clean_priors, prep_priors_multi_b, inits_kron, write_model_ch2_kron +\code{\link[=clean_priors]{clean_priors()}}, \code{\link[=prep_priors_multi_b]{prep_priors_multi_b()}}, \code{\link[=inits_kron]{inits_kron()}}, +\code{\link[=write_model_ch2_kron]{write_model_ch2_kron()}}, \code{\link[=build_kron_priors]{build_kron_priors()}} } \author{ Sam Schildhauer diff --git a/vignettes/articles/Chapter2.qmd b/vignettes/articles/Chapter2.qmd index 29c9308b..2ea4db19 100644 --- a/vignettes/articles/Chapter2.qmd +++ b/vignettes/articles/Chapter2.qmd @@ -35,7 +35,17 @@ library(Matrix) # kronecker library(serodynamics) ``` -# Step B — Minimal helpers +# Step B — Helper functions (now in package) + +All helper functions for this chapter have been moved to the `serodynamics` package: + +- `simulate_multi_b_long()`: Simulates correlated multi-biomarker data with Kronecker covariance +- `prep_priors_multi_b()`: Prepares Wishart hyperparameters for the Kronecker model +- `write_model_ch2_kron()`: Writes the JAGS model file with Kronecker precision +- `clean_priors()`: Removes fields like `omega`, `wishdf`, `prec.par` that conflict with Kronecker priors +- `build_kron_priors()`: Internal helper that combines base and Kronecker priors + +These functions are documented and tested. See `?simulate_multi_b_long` for examples. # Step C — Choose a “truth” and simulate fake data @@ -123,103 +133,194 @@ $$ ## E.1 Priors for the Correlated Model -We define a helper function `prep_priors_multiB()` that sets priors for both $\Sigma_P$ (within-biomarker) and $\Sigma_B$ (across-biomarkers). +The function `prep_priors_multi_b()` sets Wishart priors for both precision matrices: -- $\text{T}_P \sim \text{Wishart}(\Omega_P, \nu_P)$\ -- $\text{T}_B \sim \text{Wishart}(\Omega_B, \nu_B)$\ -- Kronecker precision: $\text{T} = \text{T}_B \otimes \text{T}_P$ +**Within-biomarker precision** (5×5 for the 5 parameters per biomarker): +$$\text{T}_P \sim \text{Wishart}(\Omega_P, \nu_P)$$ +**Across-biomarker precision** (B×B for B biomarkers): +$$\text{T}_B \sim \text{Wishart}(\Omega_B, \nu_B)$$ -## E.2 Write the new JAGS model file (Kronecker precision) +**Kronecker structure** combines them: +$$\text{T} = \text{T}_B \otimes \text{T}_P$$ -This is our `model.jags` with only one conceptual change:\ -instead of independent +This is a $(5B) \times (5B)$ precision matrix that captures both within and across-biomarker correlations. -$$ -par[\text{subj},\text{b},] \sim \mathcal{N}( \mu.par[\text{b},], \ \text{prec.par}[\text{b},,] ) -$$ -where b is `cur_antigen_iso` +### Default hyperparameters -we draw **all biomarkers at once** for a subject with a **Kronecker precision**: +```r +prep_priors_multi_b( + n_blocks = 2, # Number of biomarkers + omega_p_scale = rep(0.1, 5), # Diffuse for within-biomarker + nu_p = 6, # Just above dimension (weakly informative) + omega_b_scale = rep(1.0, 2), # Scale for across-biomarker + nu_b = 3 # n_blocks + 1 (minimally informative) +) +``` + +These defaults are weakly informative, allowing the data to dominate. See `?prep_priors_multi_b` for details. -$$ -\mathrm{vec}(par_{\text{subj},\cdot,\cdot}) \sim \mathcal{N}\!\big( \mathrm{vec}(\mu_{par}), \ \text{T}_B \otimes \text{T}_P \big). -$$ -- Everything else (transforms, likelihood, measurement precisions) stays as before.\ -- We keep our hyperpriors for `mu.par` (the per-biomarker means), so it plugs right into our current `prep_priors()`. +## E.2 The JAGS model with Kronecker precision -This is our new `model.jags` rename as `model_ch2_kron.jags` +The function `write_model_ch2_kron()` creates a JAGS model file that differs from our baseline in one key way: -### What changed vs. our current `model.jags` +**Baseline model** (independence): Each biomarker gets its own precision matrix +$$\text{par}[\text{subj},b,] \sim \mathcal{N}( \mu_{\text{par}}[b,], \ \text{prec.par}[b,,] )$$ +where biomarkers are independent. + +**Kronecker model** (correlated): All biomarkers for a subject are drawn jointly +$$\mathrm{vec}(\text{par}_{\text{subj},\cdot,\cdot}) \sim \mathcal{N}\!\big( \mathrm{vec}(\mu_{\text{par}}), \ \text{T}_B \otimes \text{T}_P \big)$$ +allowing correlation across biomarkers. + +Everything else remains unchanged: +- Same transforms from parameters to natural scale +- Same likelihood for observed antibody data +- Same hyperpriors on `mu.par` (parameter means) + +The model file is written automatically when you call `run_mod(correlated = TRUE)`, or you can create it explicitly: + +```r +# Optional: write model file to inspect +model_path <- write_model_ch2_kron("model_ch2_kron.jags") +``` + +## E.3: Using the unified `run_mod()` function + +The correlated model functionality is integrated into `run_mod()`. Set `correlated = TRUE` to enable the Kronecker prior: + +```r +# Independence model (default behavior) +fit_indep <- run_mod(data = sim_tbl, ...) + +# Correlated model (Kronecker prior) +fit_kron <- run_mod(data = sim_tbl, correlated = TRUE, ...) +``` -- Removed the per-biomarker `prec.par[cur_antigen_iso,,] ~ dwish(...)` and `par[subj,cur_antigen_iso,] ~ dmnorm(mu.par[cur_antigen_iso,], prec.par[cur_antigen_iso,,])`. +When `correlated = TRUE`, `run_mod()` internally: -- Replaced with one prior per subject on the stacked vector using $\text{T} = \text{T}_B \otimes \text{T}_P$ +1. Calls `build_kron_priors()` to combine base priors with Kronecker hyperparameters +2. Writes or uses `file_mod_kron` for the JAGS model with Kronecker structure +3. Uses `inits_kron()` for proper initialization +4. Monitors additional parameters: `TauB` and `TauP` (the precision matrices) -- Kept our `mu.par` prior and the likelihood exactly as is. -## E.3: Minimal wrapper so we can keep calling one function +# Understanding the Wishart Hyperparameters +The Wishart distribution is the multivariate generalization of the gamma distribution, commonly used as a prior for precision matrices (inverse covariances). -# What are `OmegaP`, `nuP`, `OmegaB`, `nuB` — and why these defaults? +## What are OmegaP, nuP, OmegaB, nuB? -These are the **Wishart hyperparameters** for the **precision matrices** (inverse covariances) used in the Kronecker prior: +For a Wishart distribution $\text{T} \sim \text{Wishart}(\Omega, \nu)$: -- $\text{T}_P \sim \text{Wishart}(\Omega_P, \nu_P)$ -- within-biomarker parameter precision (5×5). -- $\text{T}_B \sim \text{Wishart}(\Omega_B, \nu_B)$ -- across-biomarker precision (B×B). +- **$\Omega$** (scale matrix): Determines the expected scale of the precision. Smaller diagonal entries → higher expected precision → tighter covariance +- **$\nu$** (degrees of freedom): Controls how informative the prior is. Larger $\nu$ → more concentrated around the expected value -Generally speaking (JAGS Wishart): $\mathop{\mathbb{E}}[\text{T}]\approx \nu \cdot \Omega^{-1}$ when $\nu$ is not tiny. So smaller diagonal entries in $\Omega$ imply larger expected precision (i.e., smaller covariance), and vice versa. +In JAGS notation: $\mathop{\mathbb{E}}[\text{T}] \approx \nu \cdot \Omega^{-1}$ when $\nu$ exceeds the dimension of the matrix. -## Chosen weakly-informative defaults +## Our Default Choices -``` r -OmegaP_scale = rep(0.1, 5); nuP = 6 -OmegaB_scale = rep(1.0, B); nuB = B + 1 +**Within-biomarker precision** ($\text{T}_P$, 5×5): +```r +OmegaP = diag(rep(0.1, 5)) # Small scale = diffuse prior +nuP = 6 # Just above dimension (5) ``` -- `nuP = 6` is just above the dimension (5): proper but not tight. +**Across-biomarker precision** ($\text{T}_B$, B×B): +```r +OmegaB = diag(rep(1.0, B)) # Moderate scale +nuB = B + 1 # Minimally informative +``` -- `OmegaP = 0.1 * I_5` is diffuse. With small `nuP`, the prior is wide; data will dominate. +These defaults are **weakly informative**: they allow the data to dominate while ensuring the prior is proper (integrable). -- `nuB = B + 1` is a minimally-informative choice for a B×B Wishart. +## When to customize -- `OmegaB = I_B` centers `TauB` near identity while letting the data learn cross-biomarker correlation. +Consider adjusting these if: +- You have strong prior information about parameter correlations +- Convergence is poor (try slightly more informative priors) +- You want to test sensitivity to prior choice -These are starting values. Validate with prior predictive checks (simulate parameters → curves → sanity check ranges). +**Recommendation**: Start with defaults, then validate with prior predictive checks (simulate parameters → curves → sanity check ranges). -# Putting it together +# Putting it all together -- **Independence model (our baseline)**: no changes. +The complete workflow compares two models: -- **Correlated model**: we supply both the usual priors and the new Kronecker priors: +## Independence Model (Chapter 1 / Baseline) ```{r} #| echo: true #| eval: false -#| code-fold: true -#| code-summary: "Run Kronecker model (disabled for now)" -# Step 1: simulate fake data (or load real Shigella data) -sim_tbl +# Fit with default independence prior +fit_indep <- run_mod( + data = sim_tbl, + nchain = 4, + nadapt = 1000, + nburn = 500, + nmc = 500, + niter = 5000, + strat = NA +) +``` + +## Correlated Model (Chapter 2 / Kronecker) -# Step 2: write the new Kronecker model file (once per session/project) -write_model_ch2_kron() +```{r} +#| echo: true +#| eval: false -# Step 3: run the unified runner in correlated (Chapter 2) mode +# Fit with Kronecker prior across biomarkers fit_kron <- run_mod( - data = sim_tbl, - file_mod = serodynamics_example("model.jags"), # baseline model path (unused here) - file_mod_kron = "model_ch2_kron.jags", # use the Kronecker model - correlated = TRUE, # <-- key switch - nchain = 4, nadapt = 1000, nburn = 500, - nmc = 500, niter = 5000, - strat = NA, - mu_hyp_param = c(1.0, 7.0, 1.0, -4.0, -1.0) # optional override + data = sim_tbl, + correlated = TRUE, # <-- Enable Kronecker prior + nchain = 4, + nadapt = 1000, + nburn = 500, + nmc = 500, + niter = 5000, + strat = NA ) -# Step 4: inspect results (same as with run_mod before) +# Optionally override default Kronecker hyperparameters +fit_kron_custom <- run_mod( + data = sim_tbl, + correlated = TRUE, + nchain = 4, + nadapt = 1000, + nburn = 500, + nmc = 500, + niter = 5000, + strat = NA, + # Pass through to prep_priors_multi_b(): + omega_p_scale = rep(0.2, 5), # Custom within-biomarker scale + nu_p = 7, # Custom degrees of freedom + omega_b_scale = rep(1.5, 2), # Custom across-biomarker scale + nu_b = 4 # Custom degrees of freedom +) +``` + +## Comparing Results + +Both models return the same `sr_model` object structure, so downstream analysis is identical: + +```{r} +#| echo: true +#| eval: false + +# Inspect posterior summaries +fit_indep fit_kron +# Plot predictions +plot_predicted_curve(fit_indep, sim_tbl) +plot_predicted_curve(fit_kron, sim_tbl) + +# For the Kronecker model, you can also examine the precision matrices +library(ggmcmc) +fit_kron |> + filter(Parameter %in% c("TauB[1,1]", "TauB[1,2]", "TauP[1,1]")) |> + ggs_density() ```