From d2ae8cec9fa9f28da79b1172ac73663b93c7d6e5 Mon Sep 17 00:00:00 2001 From: wpetry Date: Fri, 7 Feb 2025 17:42:00 -0500 Subject: [PATCH 01/17] re-factoring arguments for modular construction of model --- R/fit_model.R | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 7841bbc..fa073bc 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -2,15 +2,17 @@ #' #' @param data A named list containing the required data, as obtained from #' [prepare_data()]. -#' @param model character. One of "Young2021", "sampling_effort", or -#' "varying_preferences", or a path to a file describing the Stan model in case -#' you want to use a modified Stan model. +#' @param vary_plant_effort Whether to account for varying sampling effort across plants. If FALSE (default), all plants are assumed to be sampled equally. If TRUE, sampling effort for each species must be supplied in the data. +#' @param vary_animal_pref Whether to allow animals to have different preference strengths for linked plants. If FALSE (default), a single preference parameter is estimated for all animals. If TRUE, separate preference parameters are fit for each animal. +#' @param plant_abun How model considers plant abundances. "estimate" (default) estimates the latent relative abundance of each plant type. "relative" allows the user to pass a simplex containing the relative abundances of each plant. "absolute" allows the user to supply the absolute abundances of each plant. +#' @param animal_abun How model considers animal abundances. "estimate" (default) estimates the latent relative abundance of each animal type. "relative" allows the user to pass a simplex containing the relative abundances of each animal. "absolute" allows the user to supply the absolute abundances of each animal. #' @param beta Rate of exponential prior on `r` (preference) parameter. +#' @param alpha_sigma Concentration parameter of Dirichlet prior on `sigma` (relative abundances of plants). Only used when `plant_abun = "estimate"`. +#' @param alpha_tau Concentration parameter of Dirichlet prior on `tau` (relative abundances of animals). Only used when `animal_abun = "estimate"`. #' Default beta is 0.01. Increase it if you have large count numbers #' (can examine the resultant prior using [plot_prior()]). -#' @param ... Further arguments for [cmdstanr::sample()], like `iter_warmup`, -#' `iter_sampling`, or `thin`, among others. It is recommended to increase the -#' number of iterations (e.g. iter_sampling = 10000). +#' @param ... Further arguments passed to [cmdstanr::sample()], like `iter_warmup`, +#' `iter_sampling`, or `thin`, among others. #' #' @return A fitted model ([cmdstanr::CmdStanMCMC()] object). #' @export @@ -22,8 +24,13 @@ #' fit <- fit_model(dt) fit_model <- function(data = NULL, - model = c("sampling_effort", "Young2021", "varying_preferences"), + vary_plant_effort = FALSE, + vary_animal_pref = FALSE, + plant_abun = c("estimate", "relative", "absolute"), + animal_abun = c("estimate", "relative", "absolute"), beta = 0.01, + alpha_sigma = 1, + alpha_tau = 1, ...) { if (is.null(data)) { From 6c70e4e27c9e583fd1ff905c3c21e5b6a5e7c5f6 Mon Sep 17 00:00:00 2001 From: wpetry Date: Mon, 10 Feb 2025 08:45:43 -0500 Subject: [PATCH 02/17] add modular skeleton for model generalization --- NAMESPACE | 1 + R/fit_model.R | 69 +++++++++++------- R/stancode.R | 178 +++++++++++++++++++++++++++++++++++++++++++++++ man/fit_model.Rd | 41 +++++++---- man/stancode.Rd | 24 +++++++ 5 files changed, 276 insertions(+), 37 deletions(-) create mode 100644 R/stancode.R create mode 100644 man/stancode.Rd diff --git a/NAMESPACE b/NAMESPACE index f0ce0b1..2139eb7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,4 +12,5 @@ export(plot_prior) export(plot_residuals) export(predict_counts) export(prepare_data) +export(stancode) import(cmdstanr) diff --git a/R/fit_model.R b/R/fit_model.R index fa073bc..fc026ab 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,16 +1,14 @@ -#' Fit model +#' Fit bipartite network model #' #' @param data A named list containing the required data, as obtained from #' [prepare_data()]. -#' @param vary_plant_effort Whether to account for varying sampling effort across plants. If FALSE (default), all plants are assumed to be sampled equally. If TRUE, sampling effort for each species must be supplied in the data. -#' @param vary_animal_pref Whether to allow animals to have different preference strengths for linked plants. If FALSE (default), a single preference parameter is estimated for all animals. If TRUE, separate preference parameters are fit for each animal. -#' @param plant_abun How model considers plant abundances. "estimate" (default) estimates the latent relative abundance of each plant type. "relative" allows the user to pass a simplex containing the relative abundances of each plant. "absolute" allows the user to supply the absolute abundances of each plant. -#' @param animal_abun How model considers animal abundances. "estimate" (default) estimates the latent relative abundance of each animal type. "relative" allows the user to pass a simplex containing the relative abundances of each animal. "absolute" allows the user to supply the absolute abundances of each animal. -#' @param beta Rate of exponential prior on `r` (preference) parameter. -#' @param alpha_sigma Concentration parameter of Dirichlet prior on `sigma` (relative abundances of plants). Only used when `plant_abun = "estimate"`. -#' @param alpha_tau Concentration parameter of Dirichlet prior on `tau` (relative abundances of animals). Only used when `animal_abun = "estimate"`. -#' Default beta is 0.01. Increase it if you have large count numbers -#' (can examine the resultant prior using [plot_prior()]). +#' @param plant_effort The sampling effort allocated to each plant. If unknown, `param` (default) estimates a single parameter during the modelling step that applies to all plants. Alternatively, "data" allows measured values of sampling effort to be supplied in `data`. +#' @param animal_pref Controls the relative preference of animals for linked plants. `fixed` (default), fits a single preference parameter for all animals during the modelling step. `varying` fits a separate preference parameter for each animal type. +#' @param plant_abun Controls how the model accounts for plant abundances. `estimate` (default) estimates the latent relative abundance of each plant type. `relative` allows the user to pass a simplex containing the relative abundances of each plant. `absolute` allows the user to supply the absolute abundances of each plant. +#' @param animal_abun Controls how the model accounts for animal abundances. "estimate" (default) estimates the latent relative abundance of each animal type. "relative" allows the user to pass a simplex containing the relative abundances of each animal. `absolute` allows the user to supply the absolute abundances of each animal. +#' @param beta Rate parameter of exponential prior on `r` (preference) parameter. The defaults, 0.01, is suitable for small counts and should be increased for larger counts. Use `plot_prior` to visualize. +#' @param alpha_sigma Concentration parameter of Dirichlet prior on `sigma` (relative abundances of plants). Only used when `plant_abun = "estimate"`. Defaults to 1. Use `plot_prior` to visualize. +#' @param alpha_tau Concentration parameter of Dirichlet prior on `tau` (relative abundances of animals). Only used when `animal_abun = "estimate"`. Defaults to 1. Use `plot_prior` to visualize. #' @param ... Further arguments passed to [cmdstanr::sample()], like `iter_warmup`, #' `iter_sampling`, or `thin`, among others. #' @@ -18,14 +16,19 @@ #' @export #' @import cmdstanr #' +#' @details +#' This function allows fits generalized variations of the bipartite network structure model of Young et al. 2021 [](https://doi.org/10.1038%2Fs41467-021-24149-x). +#' #' @examplesIf interactive() #' data(web) #' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) -#' fit <- fit_model(dt) +#' # fit the model used by Young et al. 2021 +#' fit <- fit_model(dt, plant_effort = "param", animal_pref = "fixed", +#' plant_abun = "estimate", animal_abun = "estimate") fit_model <- function(data = NULL, - vary_plant_effort = FALSE, - vary_animal_pref = FALSE, + plant_effort = c("param", "data"), + animal_pref = c("fixed", "varying"), plant_abun = c("estimate", "relative", "absolute"), animal_abun = c("estimate", "relative", "absolute"), beta = 0.01, @@ -37,22 +40,40 @@ fit_model <- function(data = NULL, stop("Please provide the data as produced by 'prepare_data()'") } - model <- match.arg(model) # TODO: remove this later to allow for user models + plant_effort <- match.arg(plant_effort) + animal_pref <- match.arg(animal_pref) + plant_abun <- match.arg(plant_abun) + animal_abun <- match.arg(animal_abun) + vars <- names(data) - if (model %in% c("sampling_effort", "Young2021", "varying_preferences")) { - mod.stan <- system.file(paste0("models/", model, ".stan"), package = "BayesianWebs") - } else { - mod.stan <- model - } + ## Check that data provided are suitable for the specified model variant + if(plant_effort == "data" & !"C" %in% vars) stop('Sampling effort, C, must be supplied in data when plant_effort = "data"') + if(plant_abun == "relative" & !"sigma" %in% vars) stop('Plant relative abundances, sigma, must be supplied in data when plant_abun = "relative"') + if(plant_abun == "absolute" & !"sigma" %in% vars) stop('Plant absolute abundances, sigma, must be supplied in data when plant_abun = "absolute"') + if(animal_abun == "relative" & !"tau" %in% vars) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "relative"') + if(animal_abun == "absolute" & !"tau" %in% vars) stop('Animal absolute abundances, tau, must be supplied in data when animal_abun = "absolute"') + if(plant_abun == "relative" & !all.equal(data$sigma, 1)) stop("Plant relative abundances, sigma, must sum to 1.") + if(animal_abun == "relative" & !all.equal(data$tau, 1)) stop("Animal relative abundances, tau, must sum to 1.") + if(plant_abun == "absolute" & all.equal(data$sigma, 1)) warning("Plant absolute abundances, sigma, appear to be relative abundances.") + if(animal_abun == "absolute" & all.equal(data$tau, 1)) warning("Animal absolute abundances, tau, appear to be relative abundances.") + stopifnot(beta > 0, alpha_sigma > 0, alpha_tau > 0) - ## Add beta to data file - stopifnot(beta > 0) + ## Add priors to data list data$beta <- beta + data$alpha_sigma <- alpha_sigma + data$alpha_tau <- alpha_tau + ## Make Stan code + ## TODO: Pre-compile all model variants during package installation? + mod.stan <- stancode(plant_effort = plant_effort, + animal_pref = animal_pref, + plant_abun = plant_abun, + animal_abun = animal_abun) + tf <- cmdstanr::write_stan_file(mod.stan) - ## compile model - model.eff <- cmdstanr::cmdstan_model(mod.stan) + ## Compile model + model.eff <- cmdstanr::cmdstan_model(tf) + ## Fit model fit <- model.eff$sample(data, ...) - } diff --git a/R/stancode.R b/R/stancode.R new file mode 100644 index 0000000..121aaf7 --- /dev/null +++ b/R/stancode.R @@ -0,0 +1,178 @@ +#' Make Stan code +#' +#' @returns A string containing a Stan model. +#' @keywords internal +#' @export +#' +#' @details +#' This function is designed to be used internally. See `fit_model` for documentation +#' of arguments. +#' +stancode <- function(plant_effort = c("param", "data"), + animal_pref = c("fixed", "varying"), + plant_abun = c("estimate", "relative", "absolute"), + animal_abun = c("estimate", "relative", "absolute")) { + # validate arguments + plant_effort <- match.arg(plant_effort) + animal_pref <- match.arg(animal_pref) + plant_abun <- match.arg(plant_abun) + animal_abun <- match.arg(animal_abun) + + # generate data block + scode_data <- paste0( + "data {\n", + " // Dimensions of the data matrix, and matrix itself.\n", + " int n_p;\n", + " int n_a;\n", + " array[n_p, n_a] int M;\n", + switch(plant_effort, + param = "", + data = " vector [n_p] C;\n"), + " real beta;\n", + switch(plant_abun, + estimate = " real alpha_sigma;\n", + relative = " simplex[n_p] sigma;\n", + absolute = " vector sigma;\n"), + switch(animal_abun, + estimate = " real alpha_tau;\n", + relative = " simplex[n_a] tau;\n", + absolute = " vector tau;\n"), + "}" + ) + + # generate transformed data block + scode_tdata <- paste0( + "transformed data {\n", + ifelse(plant_abun == "estimate" | animal_abun == "estimate", + "// Prior parameters for abundances\n", ""), + switch(plant_abun, + estimate = " vector[n_p] a_sigma = rep_vector(alpha_sigma, n_p);\n", + relative = "", + absolute = ""), + switch(animal_abun, + estimate = " vector[n_a] a_tau = rep_vector(alpha_tau, n_a);\n", + relative = "", + absolute = ""), + "// Pre-compute the marginals of M to save computation in the model loop.\n", + " array[n_p] int M_rows = rep_array(0, n_p);\n", + " array[n_a] int M_cols = rep_array(0, n_a);\n", + " int M_tot = 0;\n", + " for (i in 1:n_p) {\n", + " for (j in 1:n_a) {\n", + " M_rows[i] += M[i, j];\n", + " M_cols[j] += M[i, j];\n", + " M_tot += M[i, j];\n", + " }\n", + " }\n", + "}" + ) + + # generate parameter block + scode_param <- paste0( + "parameters {\n", + switch(plant_effort, + param = " real C;\n", + data = ""), + switch(animal_pref, + fixed = " real r;\n", + varying = " vector [n_a] r;\n"), + switch(plant_abun, + estimate = " simplex[n_p] sigma;\n", + relative = "", + absolute = ""), + switch(animal_abun, + estimate = " simplex[n_a] tau;\n", + relative = "", + absolute = ""), + " real rho;\n", + "}" + ) + + # generate model block + scode_model <- paste0( + "model {\n", + " // Priors\n", + switch(animal_pref, + fixed = " r ~ exponential(beta);\n", + varying = paste0(" for (j in 1:n_a) {\n", + " r[j] ~ exponential(beta);\n", + " }\n")), + switch(plant_abun, + estimate = " sigma ~ dirichlet(a_sigma);\n", + relative = "", + absolute = ""), + switch(animal_abun, + estimate = " tau ~ dirichlet(a_tau);\n", + relative = "", + absolute = ""), + " // Global sums and parameters\n", + " target += M_tot * log(C) - C;\n", + " // Weighted marginals of the data matrix\n", + paste0(" for (i in 1:n_p) {\n", + " target += M_rows[i] * log(sigma[i]);\n", + " }\n", + " for (j in 1:n_a) {\n", + " target += M_cols[j] * log(tau[j]);\n", + " }\n"), + " // Pairwise loop\n", + paste0(" for (i in 1:n_p) {\n", + " for (j in 1:n_a) {\n", + " real nu_ij_0 = log(1 - rho);\n"), + paste0(" real nu_ij_1 = log(rho) + M[i,j] * log(1 + ", + switch(animal_pref, + fixed = "r", + varying = "r[j]"), + ") - ", + switch(plant_effort, + param = "C", + data = "C[i]"), + " * ", + switch(animal_pref, + fixed = "r", + varying = "r[j]"), + " * ", + "sigma[i] * tau[j];\n"), + " if (nu_ij_0 > nu_ij_1)\n", + " target += nu_ij_0 + log1p_exp(nu_ij_1 - nu_ij_0);\n", + " else\n", + " target += nu_ij_1 + log1p_exp(nu_ij_0 - nu_ij_1);\n", + " }\n", + " }\n", + "}" + ) + + # generate generated quantities block + scode_gquant <- paste0( + "generated quantities {\n", + " // Posterior edge probability matrix\n", + " array[n_p, n_a] real Q;\n", + " for (i in 1:n_p) {\n", + " for (j in 1:n_a) {\n", + " real nu_ij_0 = log(1 - rho);\n", + paste0(" real nu_ij_1 = log(rho) + M[i,j] * log(1 + ", + switch(animal_pref, + fixed = "r", + varying = "r[j]"), + ") - ", + switch(plant_effort, + param = "C", + data = "C[i]"), + " * ", + switch(animal_pref, + fixed = "r", + varying = "r[j]"), + " * sigma[i] * tau[j];\n"), + " if (nu_ij_1 > 0)\n", + " Q[i, j] = 1 / (1 + exp(nu_ij_0 - nu_ij_1));\n", + " else\n", + " Q[i, j] = exp(nu_ij_1) / (exp(nu_ij_0) + exp(nu_ij_1));\n", + " }\n", + " }\n", + "}" + ) + + # make code + scode <- paste(scode_data, scode_tdata, scode_param, scode_model, scode_gquant, + sep = "\n") + return(scode) +} diff --git a/man/fit_model.Rd b/man/fit_model.Rd index c283cf5..a024dfa 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -2,12 +2,17 @@ % Please edit documentation in R/fit_model.R \name{fit_model} \alias{fit_model} -\title{Fit model} +\title{Fit bipartite network model} \usage{ fit_model( data = NULL, - model = c("sampling_effort", "Young2021", "varying_preferences"), + plant_effort = c("param", "data"), + animal_pref = c("fixed", "varying"), + plant_abun = c("estimate", "relative", "absolute"), + animal_abun = c("estimate", "relative", "absolute"), beta = 0.01, + alpha_sigma = 1, + alpha_tau = 1, ... ) } @@ -15,28 +20,38 @@ fit_model( \item{data}{A named list containing the required data, as obtained from \code{\link[=prepare_data]{prepare_data()}}.} -\item{model}{character. One of "Young2021", "sampling_effort", or -"varying_preferences", or a path to a file describing the Stan model in case -you want to use a modified Stan model.} +\item{plant_effort}{The sampling effort allocated to each plant. If unknown, \code{param} (default) estimates a single parameter during the modelling step that applies to all plants. Alternatively, "data" allows measured values of sampling effort to be supplied in \code{data}.} -\item{beta}{Rate of exponential prior on \code{r} (preference) parameter. -Default beta is 0.01. Increase it if you have large count numbers -(can examine the resultant prior using \code{\link[=plot_prior]{plot_prior()}}).} +\item{animal_pref}{Controls the relative preference of animals for linked plants. \code{fixed} (default), fits a single preference parameter for all animals during the modelling step. \code{varying} fits a separate preference parameter for each animal type.} -\item{...}{Further arguments for \code{\link[cmdstanr:model-method-sample]{cmdstanr::sample()}}, like \code{iter_warmup}, -\code{iter_sampling}, or \code{thin}, among others. It is recommended to increase the -number of iterations (e.g. iter_sampling = 10000).} +\item{plant_abun}{Controls how the model accounts for plant abundances. \code{estimate} (default) estimates the latent relative abundance of each plant type. \code{relative} allows the user to pass a simplex containing the relative abundances of each plant. \code{absolute} allows the user to supply the absolute abundances of each plant.} + +\item{animal_abun}{Controls how the model accounts for animal abundances. "estimate" (default) estimates the latent relative abundance of each animal type. "relative" allows the user to pass a simplex containing the relative abundances of each animal. \code{absolute} allows the user to supply the absolute abundances of each animal.} + +\item{beta}{Rate parameter of exponential prior on \code{r} (preference) parameter. The defaults, 0.01, is suitable for small counts and should be increased for larger counts. Use \code{plot_prior} to visualize.} + +\item{alpha_sigma}{Concentration parameter of Dirichlet prior on \code{sigma} (relative abundances of plants). Only used when \code{plant_abun = "estimate"}. Defaults to 1. Use \code{plot_prior} to visualize.} + +\item{alpha_tau}{Concentration parameter of Dirichlet prior on \code{tau} (relative abundances of animals). Only used when \code{animal_abun = "estimate"}. Defaults to 1. Use \code{plot_prior} to visualize.} + +\item{...}{Further arguments passed to \code{\link[cmdstanr:model-method-sample]{cmdstanr::sample()}}, like \code{iter_warmup}, +\code{iter_sampling}, or \code{thin}, among others.} } \value{ A fitted model (\code{\link[cmdstanr:CmdStanMCMC]{cmdstanr::CmdStanMCMC()}} object). } \description{ -Fit model +Fit bipartite network model +} +\details{ +This function allows fits generalized variations of the bipartite network structure model of Young et al. 2021 \href{https://doi.org/10.1038\%2Fs41467-021-24149-x}{\url{doi:10.1038/s41467-021-24149-x}}. } \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) -fit <- fit_model(dt) +# fit the model used by Young et al. 2021 +fit <- fit_model(dt, plant_effort = "param", animal_pref = "fixed", + plant_abun = "estimate", animal_abun = "estimate") \dontshow{\}) # examplesIf} } diff --git a/man/stancode.Rd b/man/stancode.Rd new file mode 100644 index 0000000..a59e5a7 --- /dev/null +++ b/man/stancode.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stancode.R +\name{stancode} +\alias{stancode} +\title{Make Stan code} +\usage{ +stancode( + plant_effort = c("param", "data"), + animal_pref = c("fixed", "varying"), + plant_abun = c("estimate", "relative", "absolute"), + animal_abun = c("estimate", "relative", "absolute") +) +} +\value{ +A string containing a Stan model. +} +\description{ +Make Stan code +} +\details{ +This function is designed to be used internally. See \code{fit_model} for documentation +of arguments. +} +\keyword{internal} From e1f9e7e0e5f85e9a6b22c1c9e9a1d7d27cabd328 Mon Sep 17 00:00:00 2001 From: wpetry Date: Mon, 10 Feb 2025 12:55:34 -0500 Subject: [PATCH 03/17] fix small typos; milestone: all candidate models can sample --- R/fit_model.R | 8 ++++---- R/stancode.R | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index fc026ab..0ddf5be 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -52,10 +52,10 @@ fit_model <- function(data = NULL, if(plant_abun == "absolute" & !"sigma" %in% vars) stop('Plant absolute abundances, sigma, must be supplied in data when plant_abun = "absolute"') if(animal_abun == "relative" & !"tau" %in% vars) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "relative"') if(animal_abun == "absolute" & !"tau" %in% vars) stop('Animal absolute abundances, tau, must be supplied in data when animal_abun = "absolute"') - if(plant_abun == "relative" & !all.equal(data$sigma, 1)) stop("Plant relative abundances, sigma, must sum to 1.") - if(animal_abun == "relative" & !all.equal(data$tau, 1)) stop("Animal relative abundances, tau, must sum to 1.") - if(plant_abun == "absolute" & all.equal(data$sigma, 1)) warning("Plant absolute abundances, sigma, appear to be relative abundances.") - if(animal_abun == "absolute" & all.equal(data$tau, 1)) warning("Animal absolute abundances, tau, appear to be relative abundances.") + if(plant_abun == "relative" & !all.equal(sum(data$sigma), 1)) stop("Plant relative abundances, sigma, must sum to 1.") + if(animal_abun == "relative" & !all.equal(sum(data$tau), 1)) stop("Animal relative abundances, tau, must sum to 1.") + if(plant_abun == "absolute" & all.equal(sum(data$sigma), 1)) warning("Plant absolute abundances, sigma, appear to be relative abundances.") + if(animal_abun == "absolute" & all.equal(sum(data$tau), 1)) warning("Animal absolute abundances, tau, appear to be relative abundances.") stopifnot(beta > 0, alpha_sigma > 0, alpha_tau > 0) ## Add priors to data list diff --git a/R/stancode.R b/R/stancode.R index 121aaf7..4128530 100644 --- a/R/stancode.R +++ b/R/stancode.R @@ -1,6 +1,7 @@ #' Make Stan code #' #' @returns A string containing a Stan model. +#' @author William K. Petry #' @keywords internal #' @export #' @@ -27,16 +28,16 @@ stancode <- function(plant_effort = c("param", "data"), " array[n_p, n_a] int M;\n", switch(plant_effort, param = "", - data = " vector [n_p] C;\n"), + data = " vector[n_p] C;\n"), " real beta;\n", switch(plant_abun, estimate = " real alpha_sigma;\n", relative = " simplex[n_p] sigma;\n", - absolute = " vector sigma;\n"), + absolute = " vector[n_p] sigma;\n"), switch(animal_abun, estimate = " real alpha_tau;\n", relative = " simplex[n_a] tau;\n", - absolute = " vector tau;\n"), + absolute = " vector[n_a] tau;\n"), "}" ) From d8c4b47b75bd275bce61ebb074e9f0831f44e83d Mon Sep 17 00:00:00 2001 From: wpetry Date: Mon, 10 Feb 2025 17:34:45 -0500 Subject: [PATCH 04/17] homogenize use of snake_case; add options for controlling qi point and width --- R/check_model.R | 8 +----- R/get_posterior.R | 56 +++++++++++++++++------------------- R/is_there_link.R | 5 ++-- R/plot_counts_pred.R | 16 ++++++----- R/plot_counts_pred_obs.R | 27 +++++++++-------- R/plot_interaction_prob.R | 6 ++-- R/plot_prior.R | 9 ++---- R/plot_residuals.R | 16 +++++------ R/predict_counts.R | 11 +++---- man/check_model.Rd | 2 +- man/get_posterior.Rd | 2 +- man/plot_counts_pred.Rd | 16 +++++++---- man/plot_counts_pred_obs.Rd | 23 ++++++++------- man/plot_interaction_prob.Rd | 2 +- man/plot_prior.Rd | 2 +- man/plot_residuals.Rd | 14 ++++----- man/predict_counts.Rd | 2 +- 17 files changed, 102 insertions(+), 115 deletions(-) diff --git a/R/check_model.R b/R/check_model.R index 00567c0..fbc73cf 100644 --- a/R/check_model.R +++ b/R/check_model.R @@ -9,7 +9,7 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) #' check_model(fit, data = data) @@ -38,9 +38,3 @@ check_lp <- function(fit = NULL) { gg } - - - - - - diff --git a/R/get_posterior.R b/R/get_posterior.R index 7a3021b..3306c78 100644 --- a/R/get_posterior.R +++ b/R/get_posterior.R @@ -9,7 +9,7 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) #' get_posterior(fit, dt, param = "connectance") #' @@ -32,31 +32,12 @@ get_posterior <- function(fit = NULL, # r = avg. visits from mutualists (preference) # rho = connectance - # sigma = plant.abund - # tau = animal.abund + # sigma = plant_abund + # tau = animal_abund # Q = interaction probability param <- match.arg(param) - params_all <- function(fit) { - if (fit$metadata()$model_name == "varying_preferences_model") { - out <- tidybayes::spread_draws(fit, rho, r[Animal], sigma[Plant], tau[Animal], Q[Plant, Animal]) - } else { - out <- tidybayes::spread_draws(fit, rho, r, sigma[Plant], tau[Animal], Q[Plant, Animal]) - } - return(out) - } - - params_preference <- function(fit) { - if (fit$metadata()$model_name == "varying_preferences_model") { - out <- tidybayes::spread_draws(fit, r[Animal]) - } else { - out <- tidybayes::spread_draws(fit, r) - } - return(out) - } - - post <- switch( param, all = params_all(fit), @@ -69,24 +50,21 @@ get_posterior <- function(fit = NULL, ) # use more informative names - param.names <- c( + param_names <- c( connectance = "rho", preference = "r", - plant.abund = "sigma", - animal.abund = "tau", - int.prob = "Q") + plant_abund = "sigma", + animal_abund = "tau", + int_prob = "Q") - post <- dplyr::rename(post, dplyr::any_of(param.names)) + post <- dplyr::rename(post, dplyr::any_of(param_names)) ## generate posteriors of link existence if (param == "all" | param == "link") { post <- is_there_link(post) } - - ## rename plants and animals with original labels - if ("Animal" %in% names(post)) { animals <- data.frame(Animal = 1:ncol(data$M), Animal.name = colnames(data$M)) post <- post |> @@ -102,6 +80,24 @@ get_posterior <- function(fit = NULL, } return(post) +} + +params_all <- function(fit) { + if (any(grepl("^r\\[[0-9]+\\]$", tidybayes::get_variables(fit)))) { + out <- tidybayes::spread_draws(fit, rho, r[Animal], sigma[Plant], tau[Animal], Q[Plant, Animal]) + } else { + out <- tidybayes::spread_draws(fit, rho, r, sigma[Plant], tau[Animal], Q[Plant, Animal]) + } + return(out) +} + +params_preference <- function(fit) { + if (any(grepl("^r\\[[0-9]+\\]$", tidybayes::get_variables(fit)))) { + out <- tidybayes::spread_draws(fit, r[Animal]) + } else { + out <- tidybayes::spread_draws(fit, r) + } + return(out) } diff --git a/R/is_there_link.R b/R/is_there_link.R index b8eacb1..6ebb9ba 100644 --- a/R/is_there_link.R +++ b/R/is_there_link.R @@ -9,8 +9,7 @@ #' is_there_link <- function(df = NULL) { - - stopifnot("int.prob" %in% names(df)) + stopifnot("int_prob" %in% names(df)) df |> - dplyr::mutate(link = stats::rbinom(1, size = 1, prob = int.prob)) + dplyr::mutate(link = stats::rbinom(1, size = 1, prob = int_prob)) } diff --git a/R/plot_counts_pred.R b/R/plot_counts_pred.R index 6f246fd..033e38f 100644 --- a/R/plot_counts_pred.R +++ b/R/plot_counts_pred.R @@ -1,7 +1,9 @@ #' Plot heatmap of predicted counts #' -#' @param pred.df A data frame containing the predicted counts, as generated by +#' @param pred_df A data frame containing the predicted counts, as generated by #' [predict_counts()] +#' @param .width probability cutoff to determine the width of the resulting interval (default 0.95) +#' @param .point point summary function, which takes a vector and returns a single value, e.g. [mean()], [median()], or [Mode()] #' @param ... Further arguments for [network.tools::plot_web_heatmap()]. #' #' @return A ggplot object @@ -9,17 +11,17 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) -#' pred.df <- predict_counts(fit, dt) -#' plot_counts_pred(pred.df) -#' plot_counts_pred(pred.df, sort = FALSE) +#' pred_df <- predict_counts(fit, dt) +#' plot_counts_pred(pred_df) +#' plot_counts_pred(pred_df, sort = FALSE) -plot_counts_pred <- function(pred.df = NULL, ...) { +plot_counts_pred <- function(pred_df = NULL, .width = 0.95, .point = mean, ...) { pred.df |> dplyr::select(Plant, Animal, count) |> - tidybayes::mean_qi() |> + tidybayes::point_interval(.width = .width, .point = .point) |> network.tools::plot_web_heatmap(int.var = "count", ...) + ggplot2::labs(title = "Predicted counts", fill = "") } diff --git a/R/plot_counts_pred_obs.R b/R/plot_counts_pred_obs.R index bae0876..3042de7 100644 --- a/R/plot_counts_pred_obs.R +++ b/R/plot_counts_pred_obs.R @@ -1,11 +1,12 @@ #' Plot predicted versus observed counts #' -#' @param pred.df A data frame containing the predicted counts, as generated by +#' @param pred_df A data frame containing the predicted counts, as generated by #' `predict_counts()` #' @param data Data list (from `prepare_data()`) #' @param byplant Logical. If TRUE, show predicted and observed counts per plant #' (using `ggplot2::facet_wrap()`). If FALSE, show all interactions in the same plot. -#' @param width width of the credible interval (default = 0.95). +#' @param .width probability cutoff to determine the width of the resulting interval (default 0.95). +#' @param .point point summary function, which takes a vector and returns a single value, e.g. [mean()], [median()], or [Mode()] #' @param ... Further arguments to be passed to `ggplot2::facet_wrap()` if #' `byplant = TRUE`, or to `tidybayes::geom_pointinterval()` if `byplant = FALSE`. #' @@ -14,22 +15,23 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) -#' pred.df <- predict_counts(fit, dt) -#' plot_counts_pred_obs(pred.df, dt) -#' plot_counts_pred_obs(pred.df, dt, fatten_point = 3) -#' plot_counts_pred_obs(pred.df, dt, byplant = TRUE) -#' plot_counts_pred_obs(pred.df, dt, byplant = TRUE, scale = "free") +#' pred_df <- predict_counts(fit, dt) +#' plot_counts_pred_obs(pred_df, dt) +#' plot_counts_pred_obs(pred_df, dt, fatten_point = 3) +#' plot_counts_pred_obs(pred_df, dt, byplant = TRUE) +#' plot_counts_pred_obs(pred_df, dt, byplant = TRUE, scale = "free") -plot_counts_pred_obs <- function(pred.df = NULL, data = NULL, byplant = FALSE, width = 0.95, ...) { +plot_counts_pred_obs <- function(pred_df = NULL, data = NULL, byplant = FALSE, .width = 0.95, + .point = mean, ...) { obs <- data$M |> network.tools::wide2long(int.name = "Count.obs") - pred <- pred.df |> + pred <- pred_df |> dplyr::select(Plant, Animal, Count.pred = count) |> - tidybayes::mean_qi(.width = width) |> + tidybayes::point_interval(.width = .width, .point = .point) |> dplyr::left_join(obs, by = c("Plant", "Animal")) gg <- ggplot2::ggplot(pred) + @@ -47,8 +49,5 @@ plot_counts_pred_obs <- function(pred.df = NULL, data = NULL, byplant = FALSE, w ymin = .lower, ymax = .upper), ...) } - return(gg) - - } diff --git a/R/plot_interaction_prob.R b/R/plot_interaction_prob.R index 990bc34..5a55a22 100644 --- a/R/plot_interaction_prob.R +++ b/R/plot_interaction_prob.R @@ -10,16 +10,16 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) #' post <- get_posterior(fit, dt) #' plot_interaction_prob(post) plot_interaction_prob <- function(post = NULL) { post |> - dplyr::select(Plant, Animal, int.prob) |> + dplyr::select(Plant, Animal, int_prob) |> tidybayes::mean_qi() |> - network.tools::plot_web_heatmap(int.var = "int.prob", sort = FALSE) + + network.tools::plot_web_heatmap(int.var = "int_prob", sort = FALSE) + ggplot2::labs(title = "Mean interaction probability", fill = "Probability") } diff --git a/R/plot_prior.R b/R/plot_prior.R index bede4d6..ebb9b3d 100644 --- a/R/plot_prior.R +++ b/R/plot_prior.R @@ -20,7 +20,7 @@ #' #' ## Providing fitted model #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) #' plot_prior(fit = fit, data = dt) @@ -66,14 +66,9 @@ plot_prior <- function(beta = NULL, fit = NULL, data = NULL) { ggplot2::geom_density(ggplot2::aes(preference), bounds = c(0, Inf), fill = "grey90", data = df.post, alpha = 0.5) - - } - } - return(gg) - } @@ -81,6 +76,6 @@ plot_prior <- function(beta = NULL, fit = NULL, data = NULL) { get_beta <- function(fit = NULL) { datafile <- readLines(fit$data_file()) # reading json beta.info <- datafile[grep("beta", datafile)] - beta <- as.numeric(gsub('.*beta\": ', "", beta.info)) + beta <- as.numeric(regmatches(beta.info, regexpr("\\d+(?:\\.\\d+)?(?:[eE][-+]?\\d+)?", beta.info))) return(beta) } diff --git a/R/plot_residuals.R b/R/plot_residuals.R index a47a4c5..d1e4dc4 100644 --- a/R/plot_residuals.R +++ b/R/plot_residuals.R @@ -1,8 +1,6 @@ -#' Plot heatmap of residuals -#' #' Plot heatmap of residuals (observed - predicted counts). #' -#' @param pred.df A data frame containing the predicted counts, as generated by +#' @param pred_df A data frame containing the predicted counts, as generated by #' [predict_counts()] #' @param data Data list (from `prepare_data()`) #' @param ... Further arguments for [network.tools::plot_web_heatmap()]. @@ -12,18 +10,18 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) -#' pred.df <- predict_counts(fit, dt) -#' plot_residuals(pred.df, dt) -#' plot_residuals(pred.df, dt, sort = FALSE) +#' pred_df <- predict_counts(fit, dt) +#' plot_residuals(pred_df, dt) +#' plot_residuals(pred_df, dt, sort = FALSE) -plot_residuals <- function(pred.df = NULL, data = NULL, ...) { +plot_residuals <- function(pred_df = NULL, data = NULL, ...) { obs <- data$M |> network.tools::wide2long() - pred <- pred.df |> + pred <- pred_df |> dplyr::select(Plant, Animal, count) |> tidybayes::mean_qi() |> dplyr::select(Plant, Animal, count) |> diff --git a/R/predict_counts.R b/R/predict_counts.R index be8b819..40a32cc 100644 --- a/R/predict_counts.R +++ b/R/predict_counts.R @@ -11,24 +11,21 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' fit <- fit_model(dt, refresh = 0) #' predict_counts(fit, dt) predict_counts <- function(fit = NULL, data = NULL) { - ## generate posteriors post <- get_posterior(fit, data, param = "all") ## add sampling effort per plant - C <- data.frame(Plant = rownames(data$M), effort = data$C) + C <- data.frame(Plant = rownames(data$M), plant_effort = data$C) post <- dplyr::left_join(post, C, by = "Plant") ## calculate predicted counts post <- dplyr::mutate(post, - count = (1 - int.prob) * effort * plant.abund * animal.abund + - int.prob * effort * plant.abund * animal.abund * (1 + preference)) - + count = (1 - int_prob) * plant_effort * plant_abund * animal_abund + + int_prob * plant_effort * plant_abund * animal_abund * (1 + preference)) return(post) - } diff --git a/man/check_model.Rd b/man/check_model.Rd index 45c18c8..5b35346 100644 --- a/man/check_model.Rd +++ b/man/check_model.Rd @@ -20,7 +20,7 @@ Check fitted model \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) check_model(fit, data = data) \dontshow{\}) # examplesIf} diff --git a/man/get_posterior.Rd b/man/get_posterior.Rd index 1a98199..ec2c054 100644 --- a/man/get_posterior.Rd +++ b/man/get_posterior.Rd @@ -27,7 +27,7 @@ Get posterior values \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) get_posterior(fit, dt, param = "connectance") diff --git a/man/plot_counts_pred.Rd b/man/plot_counts_pred.Rd index 17f4947..98e149a 100644 --- a/man/plot_counts_pred.Rd +++ b/man/plot_counts_pred.Rd @@ -4,12 +4,16 @@ \alias{plot_counts_pred} \title{Plot heatmap of predicted counts} \usage{ -plot_counts_pred(pred.df = NULL, ...) +plot_counts_pred(pred_df = NULL, .width = 0.95, .point = mean, ...) } \arguments{ -\item{pred.df}{A data frame containing the predicted counts, as generated by +\item{pred_df}{A data frame containing the predicted counts, as generated by \code{\link[=predict_counts]{predict_counts()}}} +\item{.width}{probability cutoff to determine the width of the resulting interval (default 0.95)} + +\item{.point}{point summary function, which takes a vector and returns a single value, e.g. \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}}, or \code{\link[=Mode]{Mode()}}} + \item{...}{Further arguments for \code{\link[network.tools:plot_web_heatmap]{network.tools::plot_web_heatmap()}}.} } \value{ @@ -21,10 +25,10 @@ Plot heatmap of predicted counts \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) -pred.df <- predict_counts(fit, dt) -plot_counts_pred(pred.df) -plot_counts_pred(pred.df, sort = FALSE) +pred_df <- predict_counts(fit, dt) +plot_counts_pred(pred_df) +plot_counts_pred(pred_df, sort = FALSE) \dontshow{\}) # examplesIf} } diff --git a/man/plot_counts_pred_obs.Rd b/man/plot_counts_pred_obs.Rd index c268fde..60282a3 100644 --- a/man/plot_counts_pred_obs.Rd +++ b/man/plot_counts_pred_obs.Rd @@ -5,15 +5,16 @@ \title{Plot predicted versus observed counts} \usage{ plot_counts_pred_obs( - pred.df = NULL, + pred_df = NULL, data = NULL, byplant = FALSE, - width = 0.95, + .width = 0.95, + .point = mean, ... ) } \arguments{ -\item{pred.df}{A data frame containing the predicted counts, as generated by +\item{pred_df}{A data frame containing the predicted counts, as generated by \code{predict_counts()}} \item{data}{Data list (from \code{prepare_data()})} @@ -21,7 +22,9 @@ plot_counts_pred_obs( \item{byplant}{Logical. If TRUE, show predicted and observed counts per plant (using \code{ggplot2::facet_wrap()}). If FALSE, show all interactions in the same plot.} -\item{width}{width of the credible interval (default = 0.95).} +\item{.width}{probability cutoff to determine the width of the resulting interval (default 0.95).} + +\item{.point}{point summary function, which takes a vector and returns a single value, e.g. \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}}, or \code{\link[=Mode]{Mode()}}} \item{...}{Further arguments to be passed to \code{ggplot2::facet_wrap()} if \code{byplant = TRUE}, or to \code{tidybayes::geom_pointinterval()} if \code{byplant = FALSE}.} @@ -35,12 +38,12 @@ Plot predicted versus observed counts \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) -pred.df <- predict_counts(fit, dt) -plot_counts_pred_obs(pred.df, dt) -plot_counts_pred_obs(pred.df, dt, fatten_point = 3) -plot_counts_pred_obs(pred.df, dt, byplant = TRUE) -plot_counts_pred_obs(pred.df, dt, byplant = TRUE, scale = "free") +pred_df <- predict_counts(fit, dt) +plot_counts_pred_obs(pred_df, dt) +plot_counts_pred_obs(pred_df, dt, fatten_point = 3) +plot_counts_pred_obs(pred_df, dt, byplant = TRUE) +plot_counts_pred_obs(pred_df, dt, byplant = TRUE, scale = "free") \dontshow{\}) # examplesIf} } diff --git a/man/plot_interaction_prob.Rd b/man/plot_interaction_prob.Rd index 1bf11cf..71130d7 100644 --- a/man/plot_interaction_prob.Rd +++ b/man/plot_interaction_prob.Rd @@ -19,7 +19,7 @@ Plot a heatmap of average interaction probabilities \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) post <- get_posterior(fit, dt) plot_interaction_prob(post) diff --git a/man/plot_prior.Rd b/man/plot_prior.Rd index 88e9c91..8f19bfe 100644 --- a/man/plot_prior.Rd +++ b/man/plot_prior.Rd @@ -31,7 +31,7 @@ plot_prior(beta = 0.001) ## Providing fitted model data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) plot_prior(fit = fit, data = dt) \dontshow{\}) # examplesIf} diff --git a/man/plot_residuals.Rd b/man/plot_residuals.Rd index 7faafed..46133f2 100644 --- a/man/plot_residuals.Rd +++ b/man/plot_residuals.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/plot_residuals.R \name{plot_residuals} \alias{plot_residuals} -\title{Plot heatmap of residuals} +\title{Plot heatmap of residuals (observed - predicted counts).} \usage{ -plot_residuals(pred.df = NULL, data = NULL, ...) +plot_residuals(pred_df = NULL, data = NULL, ...) } \arguments{ -\item{pred.df}{A data frame containing the predicted counts, as generated by +\item{pred_df}{A data frame containing the predicted counts, as generated by \code{\link[=predict_counts]{predict_counts()}}} \item{data}{Data list (from \code{prepare_data()})} @@ -23,10 +23,10 @@ Plot heatmap of residuals (observed - predicted counts). \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) -pred.df <- predict_counts(fit, dt) -plot_residuals(pred.df, dt) -plot_residuals(pred.df, dt, sort = FALSE) +pred_df <- predict_counts(fit, dt) +plot_residuals(pred_df, dt) +plot_residuals(pred_df, dt, sort = FALSE) \dontshow{\}) # examplesIf} } diff --git a/man/predict_counts.Rd b/man/predict_counts.Rd index 8ae2550..1604007 100644 --- a/man/predict_counts.Rd +++ b/man/predict_counts.Rd @@ -21,7 +21,7 @@ interaction. \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) fit <- fit_model(dt, refresh = 0) predict_counts(fit, dt) \dontshow{\}) # examplesIf} From e34269c54ba5c119769122e9e31cb12dddc03f8d Mon Sep 17 00:00:00 2001 From: wpetry Date: Mon, 10 Feb 2025 17:36:04 -0500 Subject: [PATCH 05/17] more robust checks and handling for full range of data+model scenarios --- R/fit_model.R | 29 ++++++++++++++--------- R/prepare_data.R | 57 +++++++++++++++++++++++++-------------------- R/stancode.R | 1 + man/fit_model.Rd | 2 +- man/prepare_data.Rd | 16 ++++++++----- man/stancode.Rd | 24 ------------------- 6 files changed, 62 insertions(+), 67 deletions(-) delete mode 100644 man/stancode.Rd diff --git a/R/fit_model.R b/R/fit_model.R index 0ddf5be..8aa4135 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -21,7 +21,7 @@ #' #' @examplesIf interactive() #' data(web) -#' dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +#' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' # fit the model used by Young et al. 2021 #' fit <- fit_model(dt, plant_effort = "param", animal_pref = "fixed", #' plant_abun = "estimate", animal_abun = "estimate") @@ -44,18 +44,25 @@ fit_model <- function(data = NULL, animal_pref <- match.arg(animal_pref) plant_abun <- match.arg(plant_abun) animal_abun <- match.arg(animal_abun) - vars <- names(data) ## Check that data provided are suitable for the specified model variant - if(plant_effort == "data" & !"C" %in% vars) stop('Sampling effort, C, must be supplied in data when plant_effort = "data"') - if(plant_abun == "relative" & !"sigma" %in% vars) stop('Plant relative abundances, sigma, must be supplied in data when plant_abun = "relative"') - if(plant_abun == "absolute" & !"sigma" %in% vars) stop('Plant absolute abundances, sigma, must be supplied in data when plant_abun = "absolute"') - if(animal_abun == "relative" & !"tau" %in% vars) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "relative"') - if(animal_abun == "absolute" & !"tau" %in% vars) stop('Animal absolute abundances, tau, must be supplied in data when animal_abun = "absolute"') - if(plant_abun == "relative" & !all.equal(sum(data$sigma), 1)) stop("Plant relative abundances, sigma, must sum to 1.") - if(animal_abun == "relative" & !all.equal(sum(data$tau), 1)) stop("Animal relative abundances, tau, must sum to 1.") - if(plant_abun == "absolute" & all.equal(sum(data$sigma), 1)) warning("Plant absolute abundances, sigma, appear to be relative abundances.") - if(animal_abun == "absolute" & all.equal(sum(data$tau), 1)) warning("Animal absolute abundances, tau, appear to be relative abundances.") + if(plant_effort == "data" & is.null(data$C)) stop('Sampling effort, C, must be supplied in data when plant_effort = "data"') + if(plant_abun == "relative") { + if(is.null(data$sigma)) stop('Plant relative abundances, sigma, must be supplied in data when plant_abun = "relative"') + if(!all.equal(sum(data$sigma), 1)) stop("Plant relative abundances, sigma, must sum to 1.") + } + if(animal_abun == "relative") { + if(is.null(data$tau)) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "relative"') + if(!all.equal(sum(data$tau), 1)) stop("Animal relative abundances, tau, must sum to 1.") + } + if(plant_abun == "absolute") { + if(is.null(data$sigma)) stop('Plant absolute abundances, sigma, must be supplied in data when plant_abun = "absolute"') + if(all.equal(sum(data$sigma), 1)) warning("Plant absolute abundances, sigma, appear to be relative abundances.") + } + if(animal_abun == "absolute") { + if(is.null(data$tau)) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "absolute"') + if(all.equal(sum(data$tau), 1)) warning("Animal absolute abundances, tau, appear to be relative abundances.") + } stopifnot(beta > 0, alpha_sigma > 0, alpha_tau > 0) ## Add priors to data list diff --git a/R/prepare_data.R b/R/prepare_data.R index 56da6d5..d48f760 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -1,43 +1,50 @@ #' Prepare the data for modelling #' -#' @param mat An integer matrix containing quantitative (not qualitative or binary) -#' count data con interaction frequency (e.g. visits to flowers, number of fruits +#' @param mat A matrix containing quantitative counts (not qualitative or binary) +#' of observed interactions (e.g. visits to flowers, number of fruits #' consumed per plant or species). Plants must be in rows, Animals must be in columns. -#' @param sampl.eff A numeric vector with the sampling effort (e.g. observation hours) +#' @param plant_effort A numeric vector with the sampling effort (e.g. observation hours) #' spent on each plant. +#' @param plant_abun (optional) A numeric vector with the relative or absolute abundances of each plant. +#' @param animal_abun (optional) A numeric vector with the relative or absolute abundances of each animal. #' -#' @return A named list with all the data required to run the model. +#' @return A named list containing data to be passed to the bipartite network model. #' @export #' #' @examples #' data(web) -#' prepare_data(web, sampl.eff = rep(20, nrow(web))) - -prepare_data <- function(mat = NULL, sampl.eff = NULL) { +#' prepare_data(web, plant_effort = rep(20, nrow(web))) +prepare_data <- function(mat, plant_effort = NULL, plant_abun = NULL, animal_abun = NULL) { ## Checks - stopifnot(is.matrix(mat)) - stopifnot(is.integer(mat)) - if (min(mat) < 0) { - stop("There cannot be negative counts") + if (!is.integer(mat) & all(mat == as.integer(mat))) stop("Elements of 'mat' must be integers.") + if (min(mat) < 0) stop("Elements of 'mat' must be >=0.") + if (max(mat) == 1) warning("'mat' should be a quantitative (not qualitative/binary) matrix with count data. Are you sure the maximum observed count is 1?") + if (!is.null(plant_effort)) { + stopifnot(length(plant_effort) == nrow(mat)) + stopifnot(is.numeric(plant_effort)) + if (max(plant_effort) > 100) warning("Large values for `plant_effort` may interfere with sampling efficiency and model convergence. Consider rescaling to different units (e.g. hours instead of minutes) to obtain values <100.") } - if (max(mat) == 1) { - warning("mat must be a quantitative (not qualitative/binary) matrix with count data. Are you sure the maximum observed count is 1?") + if (!is.null(plant_abun)) { + stopifnot(length(plant_abun) == nrow(mat)) + stopifnot(is.numeric(plant_abun)) + stopifnot(all(plant_abun > 0)) } - - stopifnot(length(sampl.eff) == nrow(mat)) - stopifnot(is.numeric(sampl.eff)) - if (max(sampl.eff) > 100) { - warning("sampl.eff takes large values, which could hinder model convergence. Consider using different units (e.g. hours instead of minutes) to obtain smaller numbers") + if (!is.null(animal_abun)) { + stopifnot(length(animal_abun) == ncol(mat)) + stopifnot(is.numeric(animal_abun)) + stopifnot(all(animal_abun > 0)) } - ## - - ## Data list - dt <- list(M = mat, n_p = nrow(mat), n_a = ncol(mat), C = sampl.eff) - # dt <- tidybayes::compose_data(M = mat, n_p = nrow(mat), n_a = ncol(mat), C = sampl.eff) - - dt + ## Ensure interaction counts are stored as integers + p <- rownames(mat) + a <- colnames(mat) + mat <- matrix(as.integer(mat), nrow = nrow(mat), ncol = ncol(mat), dimnames = list(p, a)) + ## Prepare data list, dropping NULL elements + dt <- list(M = mat, n_p = nrow(mat), n_a = ncol(mat), C = plant_effort, + sigma = plant_abun, tau = animal_abun) + dt <- Filter(Negate(is.null), dt) + return(dt) } diff --git a/R/stancode.R b/R/stancode.R index 4128530..9e149d6 100644 --- a/R/stancode.R +++ b/R/stancode.R @@ -2,6 +2,7 @@ #' #' @returns A string containing a Stan model. #' @author William K. Petry +#' @noRd #' @keywords internal #' @export #' diff --git a/man/fit_model.Rd b/man/fit_model.Rd index a024dfa..c090093 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -49,7 +49,7 @@ This function allows fits generalized variations of the bipartite network struct \examples{ \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(web) -dt <- prepare_data(mat = web, sampl.eff = rep(20, nrow(web))) +dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) # fit the model used by Young et al. 2021 fit <- fit_model(dt, plant_effort = "param", animal_pref = "fixed", plant_abun = "estimate", animal_abun = "estimate") diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 3ce68db..f115570 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -4,23 +4,27 @@ \alias{prepare_data} \title{Prepare the data for modelling} \usage{ -prepare_data(mat = NULL, sampl.eff = NULL) +prepare_data(mat, plant_effort = NULL, plant_abun = NULL, animal_abun = NULL) } \arguments{ -\item{mat}{An integer matrix containing quantitative (not qualitative or binary) -count data con interaction frequency (e.g. visits to flowers, number of fruits +\item{mat}{A matrix containing quantitative counts (not qualitative or binary) +of observed interactions (e.g. visits to flowers, number of fruits consumed per plant or species). Plants must be in rows, Animals must be in columns.} -\item{sampl.eff}{A numeric vector with the sampling effort (e.g. observation hours) +\item{plant_effort}{A numeric vector with the sampling effort (e.g. observation hours) spent on each plant.} + +\item{plant_abun}{(optional) A numeric vector with the relative or absolute abundances of each plant.} + +\item{animal_abun}{(optional) A numeric vector with the relative or absolute abundances of each animal.} } \value{ -A named list with all the data required to run the model. +A named list containing data to be passed to the bipartite network model. } \description{ Prepare the data for modelling } \examples{ data(web) -prepare_data(web, sampl.eff = rep(20, nrow(web))) +prepare_data(web, plant_effort = rep(20, nrow(web))) } diff --git a/man/stancode.Rd b/man/stancode.Rd deleted file mode 100644 index a59e5a7..0000000 --- a/man/stancode.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stancode.R -\name{stancode} -\alias{stancode} -\title{Make Stan code} -\usage{ -stancode( - plant_effort = c("param", "data"), - animal_pref = c("fixed", "varying"), - plant_abun = c("estimate", "relative", "absolute"), - animal_abun = c("estimate", "relative", "absolute") -) -} -\value{ -A string containing a Stan model. -} -\description{ -Make Stan code -} -\details{ -This function is designed to be used internally. See \code{fit_model} for documentation -of arguments. -} -\keyword{internal} From c2c86dbe7606478bdd2e88ba66938f4cbc5545fb Mon Sep 17 00:00:00 2001 From: wpetry Date: Mon, 10 Feb 2025 21:31:21 -0500 Subject: [PATCH 06/17] refactor to use rlang --- DESCRIPTION | 6 ++++- R/get_posterior.R | 58 ++++++++++++++++++++++------------------------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 46b1f38..5489d58 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,9 @@ Version: 0.0.7 Authors@R: c( person("Francisco", "Rodriguez-Sanchez", , "f.rodriguez.sanc@gmail.com", role = c("aut", "cre"), - comment = c(ORCID = "0000-0002-7981-1599")) + comment = c(ORCID = "0000-0002-7981-1599")), + person("William K.", "Petry", , "wpetry@ncsu.edu", role = "ctb", + comment = c(ORCID = "0000-0002-5230-5987")) ) Description: Bayesian modelling of bipartite network structure, following the approach of Young et al. . @@ -20,6 +22,8 @@ Imports: ggplot2, network.tools, tidybayes +Suggests: + rlang Remotes: stan-dev/cmdstanr, Pakillo/network.tools diff --git a/R/get_posterior.R b/R/get_posterior.R index 3306c78..f170d46 100644 --- a/R/get_posterior.R +++ b/R/get_posterior.R @@ -5,6 +5,7 @@ #' @param param character. Name of the parameter to retrieve the posterior samples. #' #' @return A data frame +#' @importFrom rlang parse_exprs !!! #' @export #' #' @examplesIf interactive() @@ -29,22 +30,37 @@ get_posterior <- function(fit = NULL, "animal.abund", "int.prob", "link")) { - - # r = avg. visits from mutualists (preference) - # rho = connectance - # sigma = plant_abund - # tau = animal_abund - # Q = interaction probability - + ## parse arguments & construct parameter set to extract param <- match.arg(param) + all_vars <- tidybayes::get_variables(fit) + if (sum(grepl("^r(\\[\\d+\\])?$", all_vars)) > 1) { + sel_r <- "r[Animal]" + } else { + sel_r <- "r" + } + if (sum(grepl("^sigma", all_vars)) > 1) { + sel_sigma <- "sigma[Plant]" + } else if (sum(grepl("^sigma", all_vars)) == 1) { + sel_sigma <- "sigma" + } else { + sel_sigma <- NULL + } + if (sum(grepl("^tau", all_vars)) > 1) { + sel_tau <- "tau[Animal]" + } else if (sum(grepl("^tau", all_vars)) == 1) { + sel_tau <- "tau" + } else { + sel_tau <- NULL + } post <- switch( param, - all = params_all(fit), + all = tidybayes::spread_draws(fit, !!!rlang::parse_exprs(c("rho", sel_r, sel_sigma, + sel_tau, "Q[Plant, Animal]"))), connectance = tidybayes::spread_draws(fit, rho), - preference = params_preference(fit), - plant.abund = tidybayes::spread_draws(fit, sigma[Plant]), - animal.abund = tidybayes::spread_draws(fit, tau[Animal]), + preference = tidybayes::spread_draws(fit, !!!rlang::parse_exprs(sel_r)), + plant.abund = tidybayes::spread_draws(fit, !!!rlang::parse_exprs(sel_sigma)), + animal.abund = tidybayes::spread_draws(fit, !!!rlang::parse_exprs(sel_tau)), int.prob = tidybayes::spread_draws(fit, Q[Plant, Animal]), link = tidybayes::spread_draws(fit, Q[Plant, Animal]), ) @@ -81,23 +97,3 @@ get_posterior <- function(fit = NULL, return(post) } - - - -params_all <- function(fit) { - if (any(grepl("^r\\[[0-9]+\\]$", tidybayes::get_variables(fit)))) { - out <- tidybayes::spread_draws(fit, rho, r[Animal], sigma[Plant], tau[Animal], Q[Plant, Animal]) - } else { - out <- tidybayes::spread_draws(fit, rho, r, sigma[Plant], tau[Animal], Q[Plant, Animal]) - } - return(out) -} - -params_preference <- function(fit) { - if (any(grepl("^r\\[[0-9]+\\]$", tidybayes::get_variables(fit)))) { - out <- tidybayes::spread_draws(fit, r[Animal]) - } else { - out <- tidybayes::spread_draws(fit, r) - } - return(out) -} From 910c8b179bd880cc336e531db17bc769fc50b588 Mon Sep 17 00:00:00 2001 From: wpetry Date: Mon, 10 Feb 2025 21:59:50 -0500 Subject: [PATCH 07/17] snake_case straggler --- R/globals.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/globals.R b/R/globals.R index 108091f..9003c6d 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,5 +1,5 @@ utils::globalVariables(c(".chain", ".draw", ".lower", ".upper", "Animal", "Count.obs", "Count.pred", "Plant", "Q", - "Visits", "animal.abund", "count", "effort", - "int.prob", "lp__", "plant.abund", "preference", + "Visits", "animal_abund", "count", "effort", + "int_prob", "lp__", "plant_abund", "preference", "prior", "r", "rho", "sigma", "tau")) From 5786f200d9c8b9fff0d74a19bf361df8dfbaa357 Mon Sep 17 00:00:00 2001 From: wpetry Date: Tue, 11 Feb 2025 18:46:33 -0500 Subject: [PATCH 08/17] rm extraneous abundance options; clean up --- NAMESPACE | 2 ++ R/fit_model.R | 38 ++++++++++---------------- R/globals.R | 2 +- R/plot_counts_pred.R | 2 +- R/plot_counts_pred_obs.R | 8 +++--- R/prepare_data.R | 6 ++--- R/stancode.R | 53 ++++++++++++++++++------------------- man/BayesianWebs-package.Rd | 5 ++++ man/fit_model.Rd | 20 +++++++------- man/prepare_data.Rd | 4 +-- 10 files changed, 68 insertions(+), 72 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 2139eb7..9ff6f22 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,3 +14,5 @@ export(predict_counts) export(prepare_data) export(stancode) import(cmdstanr) +importFrom(rlang,"!!!") +importFrom(rlang,parse_exprs) diff --git a/R/fit_model.R b/R/fit_model.R index 8aa4135..a5b9918 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -2,13 +2,13 @@ #' #' @param data A named list containing the required data, as obtained from #' [prepare_data()]. -#' @param plant_effort The sampling effort allocated to each plant. If unknown, `param` (default) estimates a single parameter during the modelling step that applies to all plants. Alternatively, "data" allows measured values of sampling effort to be supplied in `data`. -#' @param animal_pref Controls the relative preference of animals for linked plants. `fixed` (default), fits a single preference parameter for all animals during the modelling step. `varying` fits a separate preference parameter for each animal type. -#' @param plant_abun Controls how the model accounts for plant abundances. `estimate` (default) estimates the latent relative abundance of each plant type. `relative` allows the user to pass a simplex containing the relative abundances of each plant. `absolute` allows the user to supply the absolute abundances of each plant. -#' @param animal_abun Controls how the model accounts for animal abundances. "estimate" (default) estimates the latent relative abundance of each animal type. "relative" allows the user to pass a simplex containing the relative abundances of each animal. `absolute` allows the user to supply the absolute abundances of each animal. -#' @param beta Rate parameter of exponential prior on `r` (preference) parameter. The defaults, 0.01, is suitable for small counts and should be increased for larger counts. Use `plot_prior` to visualize. -#' @param alpha_sigma Concentration parameter of Dirichlet prior on `sigma` (relative abundances of plants). Only used when `plant_abun = "estimate"`. Defaults to 1. Use `plot_prior` to visualize. -#' @param alpha_tau Concentration parameter of Dirichlet prior on `tau` (relative abundances of animals). Only used when `animal_abun = "estimate"`. Defaults to 1. Use `plot_prior` to visualize. +#' @param plant_effort How to model the sampling effort allocated to each plant. If unknown, `param` (default) estimates a single parameter during the modelling step that applies to all plants, \eqn{C}. Alternatively, "data" allows measured values of sampling effort to be supplied in `data`, \eqn{C_i}. +#' @param animal_pref How to model the increase in visits of animals to plants with which they share a link relative to unlinked plants. `fixed` (default), fits a single preference parameter for all animals during the modelling step, \eqn{r}. `varying` fits a separate preference parameter for each animal type, \eqn{r_i}. +#' @param plant_abun How the model treats the abundances of interacting plants. `param` (default) treats these abundances as a parameter that the model estimates, \eqn{sigma_i}. `data` treats these abundances as measured and requires the user to supply the absolute abundances of each plant in `data`. +#' @param animal_abun How the model treats the abundances of interacting animals. `param` (default) treats these abundances as a parameter that the model estimates, \eqn{sigma_i}. `data` treats these abundances as measured and requires the user to supply the absolute abundances of each animal in `data`. +#' @param beta Rate parameter of exponential prior on the preference parameter or parameters \eqn{r} or \eqn{r_i}. The default, 0.01, is suitable for small counts and should be increased for larger counts. Use `plot_prior` to visualize. +#' @param alpha_sigma Concentration parameters of Dirichlet prior on the abundances of interacting plants, \eqn{sigma_i}, when `plant_abun = "param"`. Ignored otherwise. Defaults to 1. +#' @param alpha_tau Concentration parameters of Dirichlet prior on the abundances of interacting plants, \eqn{tau_i}, when `animal_abun = "param"`. Ignored otherwise. Defaults to 1. #' @param ... Further arguments passed to [cmdstanr::sample()], like `iter_warmup`, #' `iter_sampling`, or `thin`, among others. #' @@ -24,13 +24,13 @@ #' dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) #' # fit the model used by Young et al. 2021 #' fit <- fit_model(dt, plant_effort = "param", animal_pref = "fixed", -#' plant_abun = "estimate", animal_abun = "estimate") +#' plant_abun = "param", animal_abun = "param") fit_model <- function(data = NULL, plant_effort = c("param", "data"), animal_pref = c("fixed", "varying"), - plant_abun = c("estimate", "relative", "absolute"), - animal_abun = c("estimate", "relative", "absolute"), + plant_abun = c("param", "data"), + animal_abun = c("param", "data"), beta = 0.01, alpha_sigma = 1, alpha_tau = 1, @@ -47,21 +47,11 @@ fit_model <- function(data = NULL, ## Check that data provided are suitable for the specified model variant if(plant_effort == "data" & is.null(data$C)) stop('Sampling effort, C, must be supplied in data when plant_effort = "data"') - if(plant_abun == "relative") { - if(is.null(data$sigma)) stop('Plant relative abundances, sigma, must be supplied in data when plant_abun = "relative"') - if(!all.equal(sum(data$sigma), 1)) stop("Plant relative abundances, sigma, must sum to 1.") + if(plant_abun == "data") { + if(is.null(data$abun_p)) stop('Plant abundances, abun_p, must be supplied in `data` when plant_abun = "data"') } - if(animal_abun == "relative") { - if(is.null(data$tau)) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "relative"') - if(!all.equal(sum(data$tau), 1)) stop("Animal relative abundances, tau, must sum to 1.") - } - if(plant_abun == "absolute") { - if(is.null(data$sigma)) stop('Plant absolute abundances, sigma, must be supplied in data when plant_abun = "absolute"') - if(all.equal(sum(data$sigma), 1)) warning("Plant absolute abundances, sigma, appear to be relative abundances.") - } - if(animal_abun == "absolute") { - if(is.null(data$tau)) stop('Animal relative abundances, tau, must be supplied in data when animal_abun = "absolute"') - if(all.equal(sum(data$tau), 1)) warning("Animal absolute abundances, tau, appear to be relative abundances.") + if(animal_abun == "data") { + if(is.null(data$abun_a)) stop('Animal abundances, abun_a, must be supplied in `data` when animal_abun = "data"') } stopifnot(beta > 0, alpha_sigma > 0, alpha_tau > 0) diff --git a/R/globals.R b/R/globals.R index 9003c6d..657acd9 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,5 +1,5 @@ utils::globalVariables(c(".chain", ".draw", ".lower", ".upper", - "Animal", "Count.obs", "Count.pred", "Plant", "Q", + "Animal", "Count_obs", "Count_pred", "Plant", "Q", "Visits", "animal_abund", "count", "effort", "int_prob", "lp__", "plant_abund", "preference", "prior", "r", "rho", "sigma", "tau")) diff --git a/R/plot_counts_pred.R b/R/plot_counts_pred.R index 033e38f..028c607 100644 --- a/R/plot_counts_pred.R +++ b/R/plot_counts_pred.R @@ -19,7 +19,7 @@ plot_counts_pred <- function(pred_df = NULL, .width = 0.95, .point = mean, ...) { - pred.df |> + pred_df |> dplyr::select(Plant, Animal, count) |> tidybayes::point_interval(.width = .width, .point = .point) |> network.tools::plot_web_heatmap(int.var = "count", ...) + diff --git a/R/plot_counts_pred_obs.R b/R/plot_counts_pred_obs.R index 3042de7..62d50fb 100644 --- a/R/plot_counts_pred_obs.R +++ b/R/plot_counts_pred_obs.R @@ -27,10 +27,10 @@ plot_counts_pred_obs <- function(pred_df = NULL, data = NULL, byplant = FALSE, . .point = mean, ...) { obs <- data$M |> - network.tools::wide2long(int.name = "Count.obs") + network.tools::wide2long(int.name = "Count_obs") pred <- pred_df |> - dplyr::select(Plant, Animal, Count.pred = count) |> + dplyr::select(Plant, Animal, Count_pred = count) |> tidybayes::point_interval(.width = .width, .point = .point) |> dplyr::left_join(obs, by = c("Plant", "Animal")) @@ -40,12 +40,12 @@ plot_counts_pred_obs <- function(pred_df = NULL, data = NULL, byplant = FALSE, . if (isTRUE(byplant)) { gg <- gg + - tidybayes::geom_pointinterval(ggplot2::aes(x = Count.obs, y = Count.pred, + tidybayes::geom_pointinterval(ggplot2::aes(x = Count_obs, y = Count_pred, ymin = .lower, ymax = .upper)) + ggplot2::facet_wrap(~Plant, ...) } else { gg <- gg + - tidybayes::geom_pointinterval(ggplot2::aes(x = Count.obs, y = Count.pred, + tidybayes::geom_pointinterval(ggplot2::aes(x = Count_obs, y = Count_pred, ymin = .lower, ymax = .upper), ...) } diff --git a/R/prepare_data.R b/R/prepare_data.R index d48f760..ce2455a 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -5,8 +5,8 @@ #' consumed per plant or species). Plants must be in rows, Animals must be in columns. #' @param plant_effort A numeric vector with the sampling effort (e.g. observation hours) #' spent on each plant. -#' @param plant_abun (optional) A numeric vector with the relative or absolute abundances of each plant. -#' @param animal_abun (optional) A numeric vector with the relative or absolute abundances of each animal. +#' @param plant_abun (optional) A numeric vector with the relative or absolute interacting abundances of each plant. +#' @param animal_abun (optional) A numeric vector with the relative or absolute interacting abundances of each animal. #' #' @return A named list containing data to be passed to the bipartite network model. #' @export @@ -44,7 +44,7 @@ prepare_data <- function(mat, plant_effort = NULL, plant_abun = NULL, animal_abu ## Prepare data list, dropping NULL elements dt <- list(M = mat, n_p = nrow(mat), n_a = ncol(mat), C = plant_effort, - sigma = plant_abun, tau = animal_abun) + abs_sigma = plant_abun, abs_tau = animal_abun) dt <- Filter(Negate(is.null), dt) return(dt) } diff --git a/R/stancode.R b/R/stancode.R index 9e149d6..4275cf0 100644 --- a/R/stancode.R +++ b/R/stancode.R @@ -12,8 +12,8 @@ #' stancode <- function(plant_effort = c("param", "data"), animal_pref = c("fixed", "varying"), - plant_abun = c("estimate", "relative", "absolute"), - animal_abun = c("estimate", "relative", "absolute")) { + plant_abun = c("param", "data"), + animal_abun = c("param", "data")) { # validate arguments plant_effort <- match.arg(plant_effort) animal_pref <- match.arg(animal_pref) @@ -32,29 +32,32 @@ stancode <- function(plant_effort = c("param", "data"), data = " vector[n_p] C;\n"), " real beta;\n", switch(plant_abun, - estimate = " real alpha_sigma;\n", - relative = " simplex[n_p] sigma;\n", - absolute = " vector[n_p] sigma;\n"), + param = " real alpha_sigma;\n", + data = " vector[n_p] abun_p;\n"), switch(animal_abun, - estimate = " real alpha_tau;\n", - relative = " simplex[n_a] tau;\n", - absolute = " vector[n_a] tau;\n"), + param = " real alpha_tau;\n", + data = " vector[n_a] abun_a;\n"), "}" ) # generate transformed data block scode_tdata <- paste0( "transformed data {\n", - ifelse(plant_abun == "estimate" | animal_abun == "estimate", + ifelse(plant_abun == "param" | animal_abun == "param", "// Prior parameters for abundances\n", ""), switch(plant_abun, - estimate = " vector[n_p] a_sigma = rep_vector(alpha_sigma, n_p);\n", - relative = "", - absolute = ""), + param = " vector[n_p] a_sigma = rep_vector(alpha_sigma, n_p);\n", + data = ""), + switch(animal_abun, + param = " vector[n_a] a_tau = rep_vector(alpha_tau, n_a);\n", + data = ""), + "// Normalize abundances\n", + switch(plant_abun, + param = "", + data = " simplex[n_p] sigma = abun_p / sum(abun_p);\n"), switch(animal_abun, - estimate = " vector[n_a] a_tau = rep_vector(alpha_tau, n_a);\n", - relative = "", - absolute = ""), + param = "", + data = " simplex[n_a] tau = abun_a / sum(abun_a);\n"), "// Pre-compute the marginals of M to save computation in the model loop.\n", " array[n_p] int M_rows = rep_array(0, n_p);\n", " array[n_a] int M_cols = rep_array(0, n_a);\n", @@ -79,13 +82,11 @@ stancode <- function(plant_effort = c("param", "data"), fixed = " real r;\n", varying = " vector [n_a] r;\n"), switch(plant_abun, - estimate = " simplex[n_p] sigma;\n", - relative = "", - absolute = ""), + param = " simplex[n_p] sigma;\n", + data = ""), switch(animal_abun, - estimate = " simplex[n_a] tau;\n", - relative = "", - absolute = ""), + param = " simplex[n_a] tau;\n", + data = ""), " real rho;\n", "}" ) @@ -100,13 +101,11 @@ stancode <- function(plant_effort = c("param", "data"), " r[j] ~ exponential(beta);\n", " }\n")), switch(plant_abun, - estimate = " sigma ~ dirichlet(a_sigma);\n", - relative = "", - absolute = ""), + param = " sigma ~ dirichlet(a_sigma);\n", + data = ""), switch(animal_abun, - estimate = " tau ~ dirichlet(a_tau);\n", - relative = "", - absolute = ""), + param = " tau ~ dirichlet(a_tau);\n", + data = ""), " // Global sums and parameters\n", " target += M_tot * log(C) - C;\n", " // Weighted marginals of the data matrix\n", diff --git a/man/BayesianWebs-package.Rd b/man/BayesianWebs-package.Rd index 5c5782f..d9f07e0 100644 --- a/man/BayesianWebs-package.Rd +++ b/man/BayesianWebs-package.Rd @@ -19,5 +19,10 @@ Useful links: \author{ \strong{Maintainer}: Francisco Rodriguez-Sanchez \email{f.rodriguez.sanc@gmail.com} (\href{https://orcid.org/0000-0002-7981-1599}{ORCID}) +Other contributors: +\itemize{ + \item William K. Petry \email{wpetry@ncsu.edu} (\href{https://orcid.org/0000-0002-5230-5987}{ORCID}) [contributor] +} + } \keyword{internal} diff --git a/man/fit_model.Rd b/man/fit_model.Rd index c090093..81c3408 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -8,8 +8,8 @@ fit_model( data = NULL, plant_effort = c("param", "data"), animal_pref = c("fixed", "varying"), - plant_abun = c("estimate", "relative", "absolute"), - animal_abun = c("estimate", "relative", "absolute"), + plant_abun = c("param", "data"), + animal_abun = c("param", "data"), beta = 0.01, alpha_sigma = 1, alpha_tau = 1, @@ -20,19 +20,19 @@ fit_model( \item{data}{A named list containing the required data, as obtained from \code{\link[=prepare_data]{prepare_data()}}.} -\item{plant_effort}{The sampling effort allocated to each plant. If unknown, \code{param} (default) estimates a single parameter during the modelling step that applies to all plants. Alternatively, "data" allows measured values of sampling effort to be supplied in \code{data}.} +\item{plant_effort}{How to model the sampling effort allocated to each plant. If unknown, \code{param} (default) estimates a single parameter during the modelling step that applies to all plants, \eqn{C}. Alternatively, "data" allows measured values of sampling effort to be supplied in \code{data}, \eqn{C_i}.} -\item{animal_pref}{Controls the relative preference of animals for linked plants. \code{fixed} (default), fits a single preference parameter for all animals during the modelling step. \code{varying} fits a separate preference parameter for each animal type.} +\item{animal_pref}{How to model the increase in visits of animals to plants with which they share a link relative to unlinked plants. \code{fixed} (default), fits a single preference parameter for all animals during the modelling step, \eqn{r}. \code{varying} fits a separate preference parameter for each animal type, \eqn{r_i}.} -\item{plant_abun}{Controls how the model accounts for plant abundances. \code{estimate} (default) estimates the latent relative abundance of each plant type. \code{relative} allows the user to pass a simplex containing the relative abundances of each plant. \code{absolute} allows the user to supply the absolute abundances of each plant.} +\item{plant_abun}{How the model treats the abundances of interacting plants. \code{param} (default) treats these abundances as a parameter that the model estimates, \eqn{sigma_i}. \code{data} treats these abundances as measured and requires the user to supply the absolute abundances of each plant in \code{data}.} -\item{animal_abun}{Controls how the model accounts for animal abundances. "estimate" (default) estimates the latent relative abundance of each animal type. "relative" allows the user to pass a simplex containing the relative abundances of each animal. \code{absolute} allows the user to supply the absolute abundances of each animal.} +\item{animal_abun}{How the model treats the abundances of interacting animals. \code{param} (default) treats these abundances as a parameter that the model estimates, \eqn{sigma_i}. \code{data} treats these abundances as measured and requires the user to supply the absolute abundances of each animal in \code{data}.} -\item{beta}{Rate parameter of exponential prior on \code{r} (preference) parameter. The defaults, 0.01, is suitable for small counts and should be increased for larger counts. Use \code{plot_prior} to visualize.} +\item{beta}{Rate parameter of exponential prior on the preference parameter or parameters \eqn{r} or \eqn{r_i}. The default, 0.01, is suitable for small counts and should be increased for larger counts. Use \code{plot_prior} to visualize.} -\item{alpha_sigma}{Concentration parameter of Dirichlet prior on \code{sigma} (relative abundances of plants). Only used when \code{plant_abun = "estimate"}. Defaults to 1. Use \code{plot_prior} to visualize.} +\item{alpha_sigma}{Concentration parameters of Dirichlet prior on the abundances of interacting plants, \eqn{sigma_i}, when \code{plant_abun = "param"}. Ignored otherwise. Defaults to 1.} -\item{alpha_tau}{Concentration parameter of Dirichlet prior on \code{tau} (relative abundances of animals). Only used when \code{animal_abun = "estimate"}. Defaults to 1. Use \code{plot_prior} to visualize.} +\item{alpha_tau}{Concentration parameters of Dirichlet prior on the abundances of interacting plants, \eqn{tau_i}, when \code{animal_abun = "param"}. Ignored otherwise. Defaults to 1.} \item{...}{Further arguments passed to \code{\link[cmdstanr:model-method-sample]{cmdstanr::sample()}}, like \code{iter_warmup}, \code{iter_sampling}, or \code{thin}, among others.} @@ -52,6 +52,6 @@ data(web) dt <- prepare_data(mat = web, plant_effort = rep(20, nrow(web))) # fit the model used by Young et al. 2021 fit <- fit_model(dt, plant_effort = "param", animal_pref = "fixed", - plant_abun = "estimate", animal_abun = "estimate") + plant_abun = "param", animal_abun = "param") \dontshow{\}) # examplesIf} } diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index f115570..7fa5da9 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -14,9 +14,9 @@ consumed per plant or species). Plants must be in rows, Animals must be in colum \item{plant_effort}{A numeric vector with the sampling effort (e.g. observation hours) spent on each plant.} -\item{plant_abun}{(optional) A numeric vector with the relative or absolute abundances of each plant.} +\item{plant_abun}{(optional) A numeric vector with the relative or absolute interacting abundances of each plant.} -\item{animal_abun}{(optional) A numeric vector with the relative or absolute abundances of each animal.} +\item{animal_abun}{(optional) A numeric vector with the relative or absolute interacting abundances of each animal.} } \value{ A named list containing data to be passed to the bipartite network model. From f4821d40e8203855c09812d8cbe393d9059755c3 Mon Sep 17 00:00:00 2001 From: wpetry Date: Tue, 11 Feb 2025 18:47:15 -0500 Subject: [PATCH 09/17] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5489d58..948fff9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BayesianWebs Title: Bayesian Modelling of Bipartite Networks -Version: 0.0.7 +Version: 0.0.8 Authors@R: c( person("Francisco", "Rodriguez-Sanchez", , "f.rodriguez.sanc@gmail.com", role = c("aut", "cre"), From 1c2bd520dbd771daecf9af570f1e9ef712c39e13 Mon Sep 17 00:00:00 2001 From: wpetry Date: Tue, 11 Feb 2025 18:48:16 -0500 Subject: [PATCH 10/17] rebuild pkgdown site --- docs/404.html | 2 +- docs/LICENSE.html | 2 +- docs/authors.html | 6 +- docs/deps/bootstrap-5.3.1/bootstrap.min.css | 2 +- docs/index.html | 3 +- docs/pkgdown.yml | 2 +- docs/reference/BayesianWebs-package.html | 5 +- docs/reference/check_model.html | 4 +- docs/reference/fit_model.html | 60 ++++++++++++++------ docs/reference/get_posterior.html | 4 +- docs/reference/get_seed.html | 2 +- docs/reference/index.html | 6 +- docs/reference/plot_counts_obs-1.png | Bin 36637 -> 35578 bytes docs/reference/plot_counts_obs-2.png | Bin 34905 -> 34774 bytes docs/reference/plot_counts_obs-3.png | Bin 35619 -> 35491 bytes docs/reference/plot_counts_obs.html | 2 +- docs/reference/plot_counts_pred.html | 22 ++++--- docs/reference/plot_counts_pred_obs.html | 29 ++++++---- docs/reference/plot_interaction_prob.html | 4 +- docs/reference/plot_prior.html | 4 +- docs/reference/plot_residuals.html | 18 +++--- docs/reference/predict_counts.html | 4 +- docs/reference/prepare_data.html | 22 ++++--- docs/reference/web.html | 2 +- docs/search.json | 2 +- 25 files changed, 130 insertions(+), 77 deletions(-) diff --git a/docs/404.html b/docs/404.html index 725655a..d77cbc7 100644 --- a/docs/404.html +++ b/docs/404.html @@ -20,7 +20,7 @@ BayesianWebs - 0.0.7 + 0.0.8