diff --git a/DESCRIPTION b/DESCRIPTION index 12a1522..7517d7c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: coevolve Title: Fit Bayesian Generalized Dynamic Phylogenetic Models using 'Stan' -Version: 1.0.0.9000 +Version: 1.0.0.9001 Authors@R: c(person("Scott", "Claessens", , "scott.claessens@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3562-6981")), diff --git a/NAMESPACE b/NAMESPACE index 8efd041..87bd56d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,13 +10,16 @@ S3method(stancode,coevfit) S3method(standata,coevfit) S3method(summary,coevfit) S3method(summary,nutpie_fit) +export(coev_ancestral_states) export(coev_calculate_delta_theta) export(coev_calculate_theta) export(coev_fit) +export(coev_identify_nodes) export(coev_make_stancode) export(coev_make_standata) export(coev_plot_delta_theta) export(coev_plot_flowfield) +export(coev_plot_node_labels) export(coev_plot_pred_series) export(coev_plot_predictive_check) export(coev_plot_selection_gradient) diff --git a/NEWS.md b/NEWS.md index 7969e34..71579b9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,14 @@ ### New Features +* Added `coev_ancestral_states()` for extracting posterior estimates of + ancestral trait values at internal phylogenetic nodes, on either the + latent or response scale (#86) +* Added `coev_plot_node_labels()` for plotting the tree with internal + node IDs displayed +* Added `coev_identify_nodes()` for interactive multi-click node selection, + wrapping `ape::identify.phylo()` in a loop +* Added vignette "Ancestral State Reconstruction with coevolve" * Allow for single traits (#107) # coevolve 1.0.0 diff --git a/R/coev_ancestral_states.R b/R/coev_ancestral_states.R new file mode 100644 index 0000000..26806dd --- /dev/null +++ b/R/coev_ancestral_states.R @@ -0,0 +1,411 @@ +#' Extract ancestral state estimates from a fitted coevolutionary model +#' +#' Extracts posterior estimates of trait values at internal nodes of the +#' phylogenetic tree from a fitted \code{coevfit} object. By default, +#' estimates are returned on the latent (eta) scale; use +#' \code{scale = "response"} to apply the inverse link for each variable's +#' response distribution. The latent \code{eta} parameters in the model +#' correspond directly to nodes in the phylogeny, making ancestral state +#' reconstruction a natural byproduct of model fitting. +#' +#' @param object An object of class \code{coevfit} +#' @param variables (optional) A character vector of variable names to include. +#' If \code{NULL} (default), all variables from the fitted model are included. +#' @param nodes Which nodes to include. Either \code{"internal"} (default) for +#' internal nodes only, \code{"all"} for both tips and internal nodes, or an +#' integer vector of specific node IDs (using ape's node +#' numbering: tips = 1:N_tips, internal = (N_tips+1):total). +#' @param tree_id (optional) An integer indicating which tree to use when the +#' model was fit with a \code{multiPhylo} object. If \code{NULL} (default), +#' uses tree 1. (Integration over trees with topological uncertainty is +#' not yet implemented.) +#' @param scale Character string, either \code{"latent"} (default) to return +#' values on the latent eta scale, or \code{"response"} to apply the inverse +#' link function for each variable's response distribution. +#' @param summary Logical. If \code{TRUE} (default), returns a long-format +#' data frame with posterior point estimates and credible intervals. If +#' \code{FALSE}, returns the raw posterior draws. +#' @param prob A single numeric value between 0 and 1 indicating the desired +#' probability mass for the credible interval. Default is 0.95. +#' +#' @returns If \code{summary = TRUE}, a long-format data frame (tibble) with +#' columns: +#' \describe{ +#' \item{node}{Integer node ID in the reference tree} +#' \item{variable}{Character variable name} +#' \item{category}{For ordinal variables on the response scale, the +#' category label (\code{"cat_1"}, \code{"cat_2"}, ...). \code{NA} +#' otherwise.} +#' \item{estimate}{Posterior point estimate. For ordinal-response +#' category probabilities this is the posterior \emph{mean} (so the +#' per-node category estimates sum to 1, matching the convention of +#' \code{predict.brmsfit}); for all other cases it is the posterior +#' median.} +#' \item{lower}{Lower bound of the credible interval (per-category +#' quantile for ordinal-response rows)} +#' \item{upper}{Upper bound of the credible interval (per-category +#' quantile for ordinal-response rows)} +#' } +#' The data frame has attributes \code{ref_tree} (the phylo object used), +#' \code{prob}, and \code{scale}. +#' +#' If \code{summary = FALSE}, a list with elements: +#' \describe{ +#' \item{draws}{A 3D array (draws x nodes x variables)} +#' \item{ref_tree}{The reference phylo object} +#' \item{node_ids}{Integer vector of node IDs (matching dim 2 of draws)} +#' \item{variable_names}{Character vector of variable names} +#' } +#' +#' @examples +#' \dontrun{ +#' # fit dynamic coevolutionary model +#' fit <- coev_fit( +#' data = authority$data, +#' variables = list( +#' political_authority = "ordered_logistic", +#' religious_authority = "ordered_logistic" +#' ), +#' id = "language", +#' tree = authority$phylogeny, +#' chains = 4, +#' parallel_chains = 4, +#' seed = 1 +#' ) +#' +#' # extract ancestral states (latent scale, internal nodes, summary) +#' asr <- coev_ancestral_states(fit) +#' +#' # extract all nodes including tips +#' asr_all <- coev_ancestral_states(fit, nodes = "all") +#' +#' # extract on response scale +#' asr_resp <- coev_ancestral_states(fit, scale = "response") +#' +#' # get raw posterior draws +#' asr_draws <- coev_ancestral_states(fit, summary = FALSE) +#' } +#' +#' @seealso \code{\link{coev_fit}}, \code{\link{coev_plot_trait_values}} +#' +#' @export +coev_ancestral_states <- function(object, variables = NULL, + nodes = "internal", + tree_id = NULL, + scale = "latent", + summary = TRUE, + prob = 0.95) { + # input validation + run_checks_ancestral_states( + object, variables, nodes, tree_id, scale, summary, prob + ) + # get tree(s) from fitted model + tree <- object$tree + if (!inherits(tree, "multiPhylo")) { + tree <- list(tree) + class(tree) <- "multiPhylo" + } + n_tips <- object$stan_data$N_tips + n_seg <- object$stan_data$N_seg + var_names <- names(object$variables) + distributions <- unlist(object$variables) + j <- length(var_names) + # subset variables if requested + if (!is.null(variables)) { + var_idx <- match(variables, var_names) + var_names <- variables + } else { + var_idx <- seq_len(j) + } + # determine which tree to use + if (!is.null(tree_id)) { + t_idx <- tree_id + } else { + t_idx <- 1L + } + ref_tree <- tree[[t_idx]] + # extract posterior draws of eta + # eta has Stan dims array[N_tree, N_seg] vector[J] + # after extract_samples(): [draws, N_tree, N_seg, J] + post <- extract_samples(object) + eta_draws <- post$eta # [draws, N_tree, N_seg, J] + # subset to the selected tree + eta_t <- eta_draws[, t_idx, , , drop = FALSE] + # collapse tree dimension: [draws, N_seg, J] + eta_t <- array( + eta_t, + dim = c(dim(eta_t)[1], dim(eta_t)[3], dim(eta_t)[4]) + ) + # determine which nodes to include + # node_seq[t, i] gives the phylo node ID at position i + # eta_t[draw, k, j] is indexed by node ID k (since Stan assigns + # eta[t, node_seq[t,i]] = ..., so the second dim IS node ID) + all_node_ids <- seq_len(n_seg) + internal_mask <- all_node_ids > n_tips + if (is.character(nodes)) { + if (nodes == "internal") { + selected_nodes <- all_node_ids[internal_mask] + } else if (nodes == "all") { + selected_nodes <- all_node_ids + } + } else { + # integer vector of specific node IDs + selected_nodes <- as.integer(nodes) + } + # extract draws for selected nodes and variables + # result: [draws, length(selected_nodes), length(var_idx)] + draws_subset <- eta_t[, selected_nodes, var_idx, drop = FALSE] + # apply response-scale transform if requested + if (scale == "response") { + # compute mean of observed (non-missing) values per variable + # needed for poisson_softplus and negative_binomial_softplus + obs_means <- compute_obs_means(object, var_idx) + # get cutpoints for ordered_logistic variables + cutpoints <- extract_cutpoints(post, distributions, var_idx) + draws_subset <- apply_inverse_link( + draws_subset, distributions[var_idx], obs_means, cutpoints + ) + } + if (summary) { + # compute posterior summaries (long format: one row per + # node x variable x category) + probs <- c((1 - prob) / 2, 1 - (1 - prob) / 2) + rows <- list() + for (n_i in seq_along(selected_nodes)) { + for (v_i in seq_along(var_idx)) { + dist <- distributions[var_idx[v_i]] + if (scale == "response" && dist == "ordered_logistic") { + cat_draws <- draws_subset[[v_i]][, n_i, , drop = FALSE] + n_cats <- dim(cat_draws)[3] + for (k in seq_len(n_cats)) { + cat_k <- cat_draws[, 1, k] + # use mean so per-node category estimates sum to 1 + rows[[length(rows) + 1]] <- data.frame( + node = selected_nodes[n_i], + variable = var_names[v_i], + category = paste0("cat_", k), + estimate = mean(cat_k), + lower = stats::quantile(cat_k, probs[1], names = FALSE), + upper = stats::quantile(cat_k, probs[2], names = FALSE), + stringsAsFactors = FALSE + ) + } + } else { + if (is.list(draws_subset)) { + node_draws <- draws_subset[[v_i]][, n_i, 1] + } else { + node_draws <- draws_subset[, n_i, v_i] + } + rows[[length(rows) + 1]] <- data.frame( + node = selected_nodes[n_i], + variable = var_names[v_i], + category = NA_character_, + estimate = stats::median(node_draws), + lower = stats::quantile(node_draws, probs[1], names = FALSE), + upper = stats::quantile(node_draws, probs[2], names = FALSE), + stringsAsFactors = FALSE + ) + } + } + } + result <- do.call(dplyr::bind_rows, rows) + result <- tibble::as_tibble(result) + attr(result, "ref_tree") <- ref_tree + attr(result, "prob") <- prob + attr(result, "scale") <- scale + result + } else { + # return raw draws + if (!is.list(draws_subset)) { + dimnames(draws_subset) <- list( + draw = NULL, + node = selected_nodes, + variable = var_names + ) + } + list( + draws = draws_subset, + ref_tree = ref_tree, + node_ids = selected_nodes, + variable_names = var_names + ) + } +} + +#' Compute mean of observed (non-missing) values per variable +#' @noRd +compute_obs_means <- function(object, var_idx) { + var_names <- names(object$variables) + data <- object$data + obs_means <- numeric(length(var_idx)) + for (i in seq_along(var_idx)) { + vname <- var_names[var_idx[i]] + vals <- data[[vname]] + # remove NAs and convert to numeric + vals <- as.numeric(vals[!is.na(vals)]) + obs_means[i] <- mean(vals) + } + obs_means +} + +#' Extract cutpoints for ordered_logistic variables from posterior +#' @noRd +extract_cutpoints <- function(post, distributions, var_idx) { + cutpoints <- list() + all_dists <- distributions + for (i in seq_along(var_idx)) { + j <- var_idx[i] + if (all_dists[j] == "ordered_logistic") { + # cutpoints are named c1, c2, ... in posterior (by original position) + cp_name <- paste0("c", j) + if (cp_name %in% names(post)) { + cutpoints[[i]] <- post[[cp_name]] # [draws, n_cutpoints] + } + } else { + cutpoints[[i]] <- NULL + } + } + cutpoints +} + +#' Apply inverse link function to draws +#' @noRd +apply_inverse_link <- function(draws, distributions, obs_means, cutpoints) { + n_draws <- dim(draws)[1] + n_nodes <- dim(draws)[2] + n_vars <- dim(draws)[3] + has_ordinal <- any(distributions == "ordered_logistic") + if (has_ordinal) { + # need to return a list since ordinal variables have different dimensions + result <- vector("list", n_vars) + for (v in seq_len(n_vars)) { + dist <- distributions[v] + eta_v <- draws[, , v] # [draws, nodes] + if (is.null(dim(eta_v))) { + eta_v <- matrix(eta_v, nrow = n_draws, ncol = n_nodes) + } + if (dist == "ordered_logistic" && !is.null(cutpoints[[v]])) { + cp <- cutpoints[[v]] # [draws, n_cutpoints] + if (is.null(dim(cp))) cp <- matrix(cp, ncol = 1) # nocov + n_cats <- ncol(cp) + 1 + # result: [draws, nodes, categories] + cat_probs <- array(NA, dim = c(n_draws, n_nodes, n_cats)) + for (d in seq_len(n_draws)) { + for (n in seq_len(n_nodes)) { + cat_probs[d, n, ] <- ordered_logistic_probs( + eta_v[d, n], cp[d, ] + ) + } + } + result[[v]] <- cat_probs + } else { + # wrap scalar transform in 3D array [draws, nodes, 1] + transformed <- transform_eta(eta_v, dist, obs_means[v]) + result[[v]] <- array(transformed, dim = c(n_draws, n_nodes, 1)) + } + } + result + } else { + # all variables are non-ordinal: apply element-wise transforms + for (v in seq_len(n_vars)) { + eta_v <- draws[, , v] + if (is.null(dim(eta_v))) { + eta_v <- matrix(eta_v, nrow = n_draws, ncol = n_nodes) + } + draws[, , v] <- transform_eta(eta_v, distributions[v], obs_means[v]) + } + draws + } +} + +#' Apply scalar inverse link for a single distribution +#' @noRd +transform_eta <- function(eta, distribution, obs_mean = NULL) { + switch( + distribution, + "normal" = eta, + "bernoulli_logit" = stats::plogis(eta), + "poisson_softplus" = obs_mean * log1p(exp(eta)), + "negative_binomial_softplus" = obs_mean * log1p(exp(eta)), + "gamma_log" = exp(eta), + eta # nocov + ) +} + +#' Compute ordered logistic probabilities from eta and cutpoints +#' @noRd +ordered_logistic_probs <- function(eta, cutpoints) { + n_cats <- length(cutpoints) + 1 + probs <- numeric(n_cats) + for (k in seq_len(n_cats)) { + if (k == 1) { + probs[k] <- stats::plogis(cutpoints[1] - eta) + } else if (k == n_cats) { + probs[k] <- 1 - stats::plogis(cutpoints[k - 1] - eta) + } else { + probs[k] <- stats::plogis(cutpoints[k] - eta) - + stats::plogis(cutpoints[k - 1] - eta) + } + } + probs +} + +#' Input validation for coev_ancestral_states +#' @noRd +run_checks_ancestral_states <- function(object, variables, nodes, + tree_id, scale, + summary, prob) { + if (!methods::is(object, "coevfit")) { + stop2( + paste0( + "Argument 'object' must be a fitted coevolutionary model ", + "of class coevfit." + ) + ) + } + if (!is.null(variables)) { + if (!is.character(variables)) { + stop2("Argument 'variables' must be a character vector.") + } + if (!all(variables %in% names(object$variables))) { + stop2( + paste0( + "Argument 'variables' contains variable names that are not ", + "included in the fitted model." + ) + ) + } + } + if (is.character(nodes)) { + if (!(nodes %in% c("internal", "all"))) { + stop2( + "Argument 'nodes' must be \"internal\", \"all\", or an integer vector." + ) + } + } else if (!is.numeric(nodes)) { + stop2( + "Argument 'nodes' must be \"internal\", \"all\", or an integer vector." + ) + } + if (!is.null(tree_id)) { + if (!is.numeric(tree_id) || length(tree_id) != 1 || + as.integer(tree_id) != tree_id) { + stop2("Argument 'tree_id' must be a single integer.") + } + if (tree_id < 1 || tree_id > object$stan_data$N_tree) { + stop2("Argument 'tree_id' must be between 1 and the number of trees.") + } + } + if (!is.character(scale) || length(scale) != 1 || + !(scale %in% c("latent", "response"))) { + stop2("Argument 'scale' must be \"latent\" or \"response\".") + } + if (!is.logical(summary) || length(summary) != 1) { + stop2("Argument 'summary' must be logical.") + } + if (!is.numeric(prob) || length(prob) != 1 || prob <= 0 || prob >= 1) { + stop2( + "Argument 'prob' must be a single numeric value between 0 and 1." + ) + } +} diff --git a/R/coev_identify_nodes.R b/R/coev_identify_nodes.R new file mode 100644 index 0000000..b2aa759 --- /dev/null +++ b/R/coev_identify_nodes.R @@ -0,0 +1,73 @@ +#' Interactively select multiple nodes from a plotted phylogeny +#' +#' A convenience wrapper around \code{\link[ape]{identify.phylo}} that +#' allows the user to click on \emph{multiple} internal nodes in succession +#' and returns the collected node IDs. By default, +#' \code{\link[ape]{identify.phylo}} returns after a single click; this +#' function calls it in a loop until the user signals completion (by +#' clicking outside the tree, pressing Esc, or right-clicking, depending +#' on graphics device) or until \code{n} clicks have been collected. +#' +#' Intended workflow: +#' +#' \preformatted{ +#' tree <- coev_plot_node_labels(fit) +#' nodes <- coev_identify_nodes(tree) +#' coev_ancestral_states(fit, nodes = nodes) +#' } +#' +#' @param tree A \code{phylo} object, typically the value returned (invisibly) +#' by \code{\link{coev_plot_node_labels}}. +#' @param n (optional) Maximum number of nodes to collect. If \code{NULL} +#' (default), the loop continues until the user cancels. +#' +#' @returns An integer vector of unique node IDs, in the order they were +#' clicked. +#' +#' @author Erik Ringen \email{erikjacob.ringen@@uzh.ch} +#' +#' @seealso \code{\link{coev_plot_node_labels}}, +#' \code{\link{coev_ancestral_states}}, +#' \code{\link[ape]{identify.phylo}} +#' +#' @examples +#' \dontrun{ +#' # plot tree with internal node IDs, then click on nodes of interest +#' tree <- coev_plot_node_labels(fit) +#' nodes <- coev_identify_nodes(tree) +#' coev_ancestral_states(fit, nodes = nodes) +#' } +#' +#' @export +coev_identify_nodes <- function(tree, n = NULL) { + if (!inherits(tree, "phylo")) { + stop2("Argument 'tree' must be a 'phylo' object.") + } + if (!is.null(n)) { + if (!is.numeric(n) || length(n) != 1 || n < 1 || as.integer(n) != n) { + stop2("Argument 'n' must be a single positive integer.") + } + } + if (!interactive()) { + stop2("coev_identify_nodes() requires an interactive R session.") # nocov + } + # nocov start: interactive code path; cannot be exercised in non-interactive + # test runs, but the loop logic is straightforward. + collected <- integer() + i <- 0L + repeat { + i <- i + 1L + if (!is.null(n) && i > n) break + # dispatch to ape::identify.phylo via the graphics::identify generic; + # ape only registers identify.phylo as an S3 method (not a plain + # export), so calling ape::identify.phylo directly is invalid + sel <- tryCatch( + graphics::identify(tree, quiet = TRUE), + error = function(e) NULL + ) + if (is.null(sel) || length(sel$nodes) == 0L) break + collected <- c(collected, sel$nodes) + } + unique(collected) + # nocov end +} diff --git a/R/coev_plot_node_labels.R b/R/coev_plot_node_labels.R new file mode 100644 index 0000000..877ef16 --- /dev/null +++ b/R/coev_plot_node_labels.R @@ -0,0 +1,119 @@ +#' Plot phylogenetic tree with node labels from a fitted coevolutionary model +#' +#' Plots the phylogenetic tree from a fitted \code{coevfit} object with +#' internal node IDs displayed, making it easy to identify node numbers +#' for use with \code{\link{coev_ancestral_states}}. The tree is returned +#' invisibly so it can be passed to \code{\link[ape]{identify.phylo}} for +#' interactive node selection. +#' +#' @param object An object of class \code{coevfit} +#' @param tip_labels Logical. If \code{FALSE} (default), tip labels are +#' hidden to reduce clutter. Set to \code{TRUE} to display tip labels. +#' @param node_cex Numeric scaling factor for node label size. If \code{NULL} +#' (default), the size is chosen automatically based on the number of tips. +#' @param node_col Color for node labels. Default is \code{"#B2182B"} (red). +#' @param ... Additional arguments passed to \code{\link[ape]{plot.phylo}} +#' (e.g. \code{cex} for tip label size, \code{edge.width}, \code{type}). +#' +#' @returns The \code{phylo} tree object, returned invisibly. +#' +#' @author Erik Ringen \email{erikjacob.ringen@@uzh.ch} +#' +#' @details This function is a convenience wrapper around +#' \code{\link[ape]{plot.phylo}} and \code{\link[ape]{nodelabels}} that +#' displays internal node IDs using ape's numbering convention: tips are +#' numbered \code{1:N_tips} and internal nodes are +#' \code{(N_tips + 1):(N_tips + N_internal)}. +#' +#' Node label size scales automatically with tree size when +#' \code{node_cex = NULL}. For large trees, consider hiding tip labels +#' (the default) and using the interactive workflow below to identify +#' nodes of interest. +#' +#' In an interactive R session, the returned tree can be passed to +#' \code{identify()} to click on nodes and retrieve their IDs: +#' +#' \preformatted{ +#' tree <- coev_plot_node_labels(fit) +#' selected <- identify(tree) +#' coev_ancestral_states(fit, nodes = selected$nodes) +#' } +#' +#' @seealso \code{\link{coev_ancestral_states}}, +#' \code{\link[ape]{plot.phylo}}, \code{\link[ape]{identify.phylo}} +#' +#' @examples +#' \dontrun{ +#' # fit dynamic coevolutionary model +#' fit <- coev_fit( +#' data = authority$data, +#' variables = list( +#' political_authority = "ordered_logistic", +#' religious_authority = "ordered_logistic" +#' ), +#' id = "language", +#' tree = authority$phylogeny, +#' chains = 4, +#' parallel_chains = 4, +#' seed = 1 +#' ) +#' +#' # plot tree with node labels (no tip labels by default) +#' coev_plot_node_labels(fit) +#' +#' # show tip labels and customise node appearance +#' coev_plot_node_labels(fit, tip_labels = TRUE, node_cex = 0.4, +#' cex = 0.4) +#' +#' # interactive node selection (in interactive R sessions) +#' tree <- coev_plot_node_labels(fit) +#' selected <- identify(tree) +#' coev_ancestral_states(fit, nodes = selected$nodes) +#' } +#' +#' @export +coev_plot_node_labels <- function(object, tip_labels = FALSE, + node_cex = NULL, node_col = "#B2182B", + ...) { + if (!methods::is(object, "coevfit")) { + stop2( + paste0( + "Argument 'object' must be a fitted coevolutionary model ", + "of class coevfit." + ) + ) + } + if (!is.logical(tip_labels) || length(tip_labels) != 1) { + stop2("Argument 'tip_labels' must be TRUE or FALSE.") + } + if (!is.null(node_cex)) { + if (!is.numeric(node_cex) || length(node_cex) != 1 || node_cex <= 0) { + stop2("Argument 'node_cex' must be a single positive number.") + } + } + tree <- object$tree + if (inherits(tree, "multiPhylo")) tree <- tree[[1]] + n_tips <- length(tree$tip.label) + internal_ids <- (n_tips + 1):(n_tips + tree$Nnode) + # auto-scale node label size based on number of tips + if (is.null(node_cex)) { + node_cex <- min(0.8, max(0.25, 5 / sqrt(n_tips))) + } + ape::plot.phylo( + tree, + show.tip.label = tip_labels, + edge.width = 1.5, + edge.color = "grey50", + no.margin = !tip_labels, + ... + ) + ape::nodelabels( + text = internal_ids, + cex = node_cex, + frame = "none", + col = node_col, + font = 2, + adj = c(-0.3, 0.5) + ) + invisible(tree) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 079ecca..b9112bb 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,8 @@ url: https://scottclaessens.github.io/coevolve/ template: math-rendering: mathjax +articles: +- title: Vignettes + contents: + - coevolve + - ancestral_states diff --git a/man/coev_ancestral_states.Rd b/man/coev_ancestral_states.Rd new file mode 100644 index 0000000..4948d7c --- /dev/null +++ b/man/coev_ancestral_states.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coev_ancestral_states.R +\name{coev_ancestral_states} +\alias{coev_ancestral_states} +\title{Extract ancestral state estimates from a fitted coevolutionary model} +\usage{ +coev_ancestral_states( + object, + variables = NULL, + nodes = "internal", + tree_id = NULL, + scale = "latent", + summary = TRUE, + prob = 0.95 +) +} +\arguments{ +\item{object}{An object of class \code{coevfit}} + +\item{variables}{(optional) A character vector of variable names to include. +If \code{NULL} (default), all variables from the fitted model are included.} + +\item{nodes}{Which nodes to include. Either \code{"internal"} (default) for +internal nodes only, \code{"all"} for both tips and internal nodes, or an +integer vector of specific node IDs (using ape's node +numbering: tips = 1:N_tips, internal = (N_tips+1):total).} + +\item{tree_id}{(optional) An integer indicating which tree to use when the +model was fit with a \code{multiPhylo} object. If \code{NULL} (default), +uses tree 1. (Integration over trees with topological uncertainty is +not yet implemented.)} + +\item{scale}{Character string, either \code{"latent"} (default) to return +values on the latent eta scale, or \code{"response"} to apply the inverse +link function for each variable's response distribution.} + +\item{summary}{Logical. If \code{TRUE} (default), returns a long-format +data frame with posterior point estimates and credible intervals. If +\code{FALSE}, returns the raw posterior draws.} + +\item{prob}{A single numeric value between 0 and 1 indicating the desired +probability mass for the credible interval. Default is 0.95.} +} +\value{ +If \code{summary = TRUE}, a long-format data frame (tibble) with +columns: +\describe{ +\item{node}{Integer node ID in the reference tree} +\item{variable}{Character variable name} +\item{category}{For ordinal variables on the response scale, the +category label (\code{"cat_1"}, \code{"cat_2"}, ...). \code{NA} +otherwise.} +\item{estimate}{Posterior point estimate. For ordinal-response +category probabilities this is the posterior \emph{mean} (so the +per-node category estimates sum to 1, matching the convention of +\code{predict.brmsfit}); for all other cases it is the posterior +median.} +\item{lower}{Lower bound of the credible interval (per-category +quantile for ordinal-response rows)} +\item{upper}{Upper bound of the credible interval (per-category +quantile for ordinal-response rows)} +} +The data frame has attributes \code{ref_tree} (the phylo object used), +\code{prob}, and \code{scale}. + +If \code{summary = FALSE}, a list with elements: +\describe{ +\item{draws}{A 3D array (draws x nodes x variables)} +\item{ref_tree}{The reference phylo object} +\item{node_ids}{Integer vector of node IDs (matching dim 2 of draws)} +\item{variable_names}{Character vector of variable names} +} +} +\description{ +Extracts posterior estimates of trait values at internal nodes of the +phylogenetic tree from a fitted \code{coevfit} object. By default, +estimates are returned on the latent (eta) scale; use +\code{scale = "response"} to apply the inverse link for each variable's +response distribution. The latent \code{eta} parameters in the model +correspond directly to nodes in the phylogeny, making ancestral state +reconstruction a natural byproduct of model fitting. +} +\examples{ +\dontrun{ +# fit dynamic coevolutionary model +fit <- coev_fit( + data = authority$data, + variables = list( + political_authority = "ordered_logistic", + religious_authority = "ordered_logistic" + ), + id = "language", + tree = authority$phylogeny, + chains = 4, + parallel_chains = 4, + seed = 1 +) + +# extract ancestral states (latent scale, internal nodes, summary) +asr <- coev_ancestral_states(fit) + +# extract all nodes including tips +asr_all <- coev_ancestral_states(fit, nodes = "all") + +# extract on response scale +asr_resp <- coev_ancestral_states(fit, scale = "response") + +# get raw posterior draws +asr_draws <- coev_ancestral_states(fit, summary = FALSE) +} + +} +\seealso{ +\code{\link{coev_fit}}, \code{\link{coev_plot_trait_values}} +} diff --git a/man/coev_identify_nodes.Rd b/man/coev_identify_nodes.Rd new file mode 100644 index 0000000..4932f8b --- /dev/null +++ b/man/coev_identify_nodes.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coev_identify_nodes.R +\name{coev_identify_nodes} +\alias{coev_identify_nodes} +\title{Interactively select multiple nodes from a plotted phylogeny} +\usage{ +coev_identify_nodes(tree, n = NULL) +} +\arguments{ +\item{tree}{A \code{phylo} object, typically the value returned (invisibly) +by \code{\link{coev_plot_node_labels}}.} + +\item{n}{(optional) Maximum number of nodes to collect. If \code{NULL} +(default), the loop continues until the user cancels.} +} +\value{ +An integer vector of unique node IDs, in the order they were +clicked. +} +\description{ +A convenience wrapper around \code{\link[ape]{identify.phylo}} that +allows the user to click on \emph{multiple} internal nodes in succession +and returns the collected node IDs. By default, +\code{\link[ape]{identify.phylo}} returns after a single click; this +function calls it in a loop until the user signals completion (by +clicking outside the tree, pressing Esc, or right-clicking, depending +on graphics device) or until \code{n} clicks have been collected. +} +\details{ +Intended workflow: + +\preformatted{ +tree <- coev_plot_node_labels(fit) +nodes <- coev_identify_nodes(tree) +coev_ancestral_states(fit, nodes = nodes) +} +} +\examples{ +\dontrun{ +# plot tree with internal node IDs, then click on nodes of interest +tree <- coev_plot_node_labels(fit) +nodes <- coev_identify_nodes(tree) +coev_ancestral_states(fit, nodes = nodes) +} + +} +\seealso{ +\code{\link{coev_plot_node_labels}}, +\code{\link{coev_ancestral_states}}, +\code{\link[ape]{identify.phylo}} +} +\author{ +Erik Ringen \email{erikjacob.ringen@uzh.ch} +} diff --git a/man/coev_plot_node_labels.Rd b/man/coev_plot_node_labels.Rd new file mode 100644 index 0000000..275cb37 --- /dev/null +++ b/man/coev_plot_node_labels.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coev_plot_node_labels.R +\name{coev_plot_node_labels} +\alias{coev_plot_node_labels} +\title{Plot phylogenetic tree with node labels from a fitted coevolutionary model} +\usage{ +coev_plot_node_labels( + object, + tip_labels = FALSE, + node_cex = NULL, + node_col = "#B2182B", + ... +) +} +\arguments{ +\item{object}{An object of class \code{coevfit}} + +\item{tip_labels}{Logical. If \code{FALSE} (default), tip labels are +hidden to reduce clutter. Set to \code{TRUE} to display tip labels.} + +\item{node_cex}{Numeric scaling factor for node label size. If \code{NULL} +(default), the size is chosen automatically based on the number of tips.} + +\item{node_col}{Color for node labels. Default is \code{"#B2182B"} (red).} + +\item{...}{Additional arguments passed to \code{\link[ape]{plot.phylo}} +(e.g. \code{cex} for tip label size, \code{edge.width}, \code{type}).} +} +\value{ +The \code{phylo} tree object, returned invisibly. +} +\description{ +Plots the phylogenetic tree from a fitted \code{coevfit} object with +internal node IDs displayed, making it easy to identify node numbers +for use with \code{\link{coev_ancestral_states}}. The tree is returned +invisibly so it can be passed to \code{\link[ape]{identify.phylo}} for +interactive node selection. +} +\details{ +This function is a convenience wrapper around +\code{\link[ape]{plot.phylo}} and \code{\link[ape]{nodelabels}} that +displays internal node IDs using ape's numbering convention: tips are +numbered \code{1:N_tips} and internal nodes are +\code{(N_tips + 1):(N_tips + N_internal)}. + +Node label size scales automatically with tree size when +\code{node_cex = NULL}. For large trees, consider hiding tip labels +(the default) and using the interactive workflow below to identify +nodes of interest. + +In an interactive R session, the returned tree can be passed to +\code{identify()} to click on nodes and retrieve their IDs: + +\preformatted{ + tree <- coev_plot_node_labels(fit) + selected <- identify(tree) + coev_ancestral_states(fit, nodes = selected$nodes) + } +} +\examples{ +\dontrun{ +# fit dynamic coevolutionary model +fit <- coev_fit( + data = authority$data, + variables = list( + political_authority = "ordered_logistic", + religious_authority = "ordered_logistic" + ), + id = "language", + tree = authority$phylogeny, + chains = 4, + parallel_chains = 4, + seed = 1 +) + +# plot tree with node labels (no tip labels by default) +coev_plot_node_labels(fit) + +# show tip labels and customise node appearance +coev_plot_node_labels(fit, tip_labels = TRUE, node_cex = 0.4, + cex = 0.4) + +# interactive node selection (in interactive R sessions) +tree <- coev_plot_node_labels(fit) +selected <- identify(tree) +coev_ancestral_states(fit, nodes = selected$nodes) +} + +} +\seealso{ +\code{\link{coev_ancestral_states}}, +\code{\link[ape]{plot.phylo}}, \code{\link[ape]{identify.phylo}} +} +\author{ +Erik Ringen \email{erikjacob.ringen@uzh.ch} +} diff --git a/tests/testthat/test-coev_ancestral_states.R b/tests/testthat/test-coev_ancestral_states.R new file mode 100644 index 0000000..62d6d0d --- /dev/null +++ b/tests/testthat/test-coev_ancestral_states.R @@ -0,0 +1,307 @@ +test_that("coev_ancestral_states() produces expected errors", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + expect_error( + coev_ancestral_states(object = "fail"), + "Argument 'object' must be a fitted coevolutionary model of class coevfit.", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, variables = 123), + "Argument 'variables' must be a character vector.", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, variables = "nonexistent"), + paste0( + "Argument 'variables' contains variable names that are not ", + "included in the fitted model." + ), + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, nodes = "bad_value"), + "Argument 'nodes' must be", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, nodes = TRUE), + "Argument 'nodes' must be", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, tree_id = "fail"), + "Argument 'tree_id' must be a single integer.", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, tree_id = 99), + "Argument 'tree_id' must be between 1 and the number of trees.", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, scale = "bad"), + "Argument 'scale' must be", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, summary = "fail"), + "Argument 'summary' must be logical.", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, prob = 1.5), + "Argument 'prob' must be a single numeric value between 0 and 1.", + fixed = TRUE + ) + expect_error( + coev_ancestral_states(object = m01, prob = -0.1), + "Argument 'prob' must be a single numeric value between 0 and 1.", + fixed = TRUE + ) +}) + +test_that("coev_ancestral_states() returns correct structure", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states(m01)) + + expect_true(is.data.frame(result)) + expect_true(all( + c("node", "variable", "category", "estimate", "lower", "upper") %in% + names(result) + )) + expect_false("clade_pp" %in% names(result)) + # latent scale: category is always NA + expect_true(all(is.na(result$category))) + n_tips <- m01$stan_data$N_tips + n_internal <- n_tips - 1 + n_vars <- length(m01$variables) + expect_equal(nrow(result), n_internal * n_vars) + expect_true(all(result$node > n_tips)) + expect_equal( + sort(unique(result$variable)), + sort(names(m01$variables)) + ) + expect_true(all(is.finite(result$estimate))) + expect_true(all(result$lower <= result$estimate)) + expect_true(all(result$estimate <= result$upper)) + expect_true(!is.null(attr(result, "ref_tree"))) + expect_true(inherits(attr(result, "ref_tree"), "phylo")) +}) + +test_that("coev_ancestral_states() nodes = 'all' includes tips", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + # latent scale: one row per node x variable (no category expansion) + result <- sw(coev_ancestral_states(m01, nodes = "all")) + n_tips <- m01$stan_data$N_tips + n_seg <- m01$stan_data$N_seg + n_vars <- length(m01$variables) + expect_equal(nrow(result), n_seg * n_vars) + expect_true(any(result$node <= n_tips)) + expect_true(any(result$node > n_tips)) +}) + +test_that("coev_ancestral_states() nodes = integer vector works", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + n_tips <- m01$stan_data$N_tips + target_nodes <- c(n_tips + 1, n_tips + 2) + result <- sw(coev_ancestral_states(m01, nodes = target_nodes)) + n_vars <- length(m01$variables) + expect_equal(nrow(result), length(target_nodes) * n_vars) + expect_equal(sort(unique(result$node)), sort(target_nodes)) +}) + +test_that("coev_ancestral_states() summary = FALSE returns draws", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states(m01, summary = FALSE)) + expect_true(is.list(result)) + expect_true("draws" %in% names(result)) + expect_true("ref_tree" %in% names(result)) + expect_true("variable_names" %in% names(result)) + expect_equal(length(dim(result$draws)), 3) + n_tips <- m01$stan_data$N_tips + n_internal <- n_tips - 1 + n_vars <- length(m01$variables) + expect_equal(dim(result$draws)[2], n_internal) + expect_equal(dim(result$draws)[3], n_vars) + expect_true(all(is.finite(result$draws))) +}) + +test_that("coev_ancestral_states() variables subsets correctly", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states(m01, variables = c("u", "v"))) + expect_equal(sort(unique(result$variable)), sort(c("u", "v"))) + n_tips <- m01$stan_data$N_tips + n_internal <- n_tips - 1 + expect_equal(nrow(result), n_internal * 2) +}) + +test_that("coev_ancestral_states() prob changes CI width", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + narrow <- sw(coev_ancestral_states(m01, prob = 0.5)) + wide <- sw(coev_ancestral_states(m01, prob = 0.99)) + ci_width_narrow <- narrow$upper - narrow$lower + ci_width_wide <- wide$upper - wide$lower + expect_true(all(ci_width_wide >= ci_width_narrow - 1e-10)) +}) + +test_that("coev_ancestral_states() bernoulli response is [0,1]", { + m09 <- readRDS(test_path("fixtures", "coevfit_example_09.rds")) + m09 <- reload_fit(m09, filename = "coevfit_example_09-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states(m09, scale = "response")) + expect_true(all(result$estimate >= 0 & result$estimate <= 1)) + expect_true(all(result$lower >= 0)) + expect_true(all(result$upper <= 1)) +}) + +test_that("coev_ancestral_states() poisson response is positive", { + m04 <- readRDS(test_path("fixtures", "coevfit_example_04.rds")) + m04 <- reload_fit(m04, filename = "coevfit_example_04-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states(m04, scale = "response")) + expect_true(all(result$estimate > 0)) + expect_true(all(result$lower >= 0)) +}) + +test_that("coev_ancestral_states() normal response = latent", { + m10 <- readRDS(test_path("fixtures", "coevfit_example_10.rds")) + m10 <- reload_fit(m10, filename = "coevfit_example_10-1.csv") + sw <- suppressWarnings + latent <- sw(coev_ancestral_states(m10, scale = "latent")) + response <- sw(coev_ancestral_states(m10, scale = "response")) + expect_equal(latent$estimate, response$estimate, tolerance = 1e-10) + expect_equal(latent$lower, response$lower, tolerance = 1e-10) + expect_equal(latent$upper, response$upper, tolerance = 1e-10) +}) + +test_that("coev_ancestral_states() gamma response is positive", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states( + m01, variables = "u", scale = "response" + )) + expect_true(all(result$estimate > 0)) + expect_true(all(result$lower > 0)) +}) + +test_that("coev_ancestral_states() ordinal response uses category column", { + m02 <- readRDS(test_path("fixtures", "coevfit_example_02.rds")) + m02 <- reload_fit(m02, filename = "coevfit_example_02-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states( + m02, variables = "x", scale = "response" + )) + # long format: category column populated for ordinal-response rows + expect_true("category" %in% names(result)) + expect_true(all(grepl("^cat_", result$category))) + expect_true(all(result$estimate >= 0 & result$estimate <= 1)) + expect_true(all(result$lower >= 0 & result$upper <= 1)) + # per-node category estimates should sum to 1 exactly + # (means distribute over sums) + sums_per_node <- vapply( + split(result$estimate, result$node), + sum, numeric(1) + ) + expect_equal(unname(sums_per_node), rep(1, length(sums_per_node)), + tolerance = 1e-10) +}) + +test_that("coev_ancestral_states() mixed ordinal/non-ordinal output", { + # m02 has bernoulli_logit + ordered_logistic + m02 <- readRDS(test_path("fixtures", "coevfit_example_02.rds")) + m02 <- reload_fit(m02, filename = "coevfit_example_02-1.csv") + sw <- suppressWarnings + result <- sw(coev_ancestral_states(m02, scale = "response")) + ord_vars <- names(m02$variables)[ + unlist(m02$variables) == "ordered_logistic" + ] + non_ord_vars <- setdiff(names(m02$variables), ord_vars) + # category populated only for ordinal-response rows + expect_true(all(is.na(result$category[result$variable %in% non_ord_vars]))) + expect_true(all(!is.na(result$category[result$variable %in% ord_vars]))) + # estimate, lower, upper columns populated for every row + expect_true(all(is.finite(result$estimate))) + expect_true(all(is.finite(result$lower))) + expect_true(all(is.finite(result$upper))) +}) + +test_that("coev_ancestral_states() multi-tree tree_id works", { + m08 <- readRDS(test_path("fixtures", "coevfit_example_08.rds")) + m08 <- reload_fit(m08, filename = "coevfit_example_08-1.csv") + sw <- suppressWarnings + result1 <- sw(coev_ancestral_states(m08, tree_id = 1)) + expect_true(is.data.frame(result1)) + expect_false("clade_pp" %in% names(result1)) + result2 <- sw(coev_ancestral_states(m08, tree_id = 2)) + expect_true(is.data.frame(result2)) +}) + +test_that("coev_ancestral_states() multi-tree default = tree 1", { + m08 <- readRDS(test_path("fixtures", "coevfit_example_08.rds")) + m08 <- reload_fit(m08, filename = "coevfit_example_08-1.csv") + sw <- suppressWarnings + result_default <- sw(coev_ancestral_states(m08)) + result_t1 <- sw(coev_ancestral_states(m08, tree_id = 1)) + expect_equal(result_default$estimate, result_t1$estimate) +}) + +test_that("coev_ancestral_states() single-node response works (ordinal)", { + m02 <- readRDS(test_path("fixtures", "coevfit_example_02.rds")) + m02 <- reload_fit(m02, filename = "coevfit_example_02-1.csv") + sw <- suppressWarnings + n_tips <- m02$stan_data$N_tips + result <- sw(coev_ancestral_states( + m02, variables = "x", nodes = n_tips + 1L, scale = "response" + )) + expect_true(is.data.frame(result)) + # one row per category for the single ordinal variable + expect_true(all(grepl("^cat_", result$category))) + expect_true(nrow(result) >= 2) +}) + +test_that("coev_ancestral_states() single-node response works (non-ordinal)", { + m09 <- readRDS(test_path("fixtures", "coevfit_example_09.rds")) + m09 <- reload_fit(m09, filename = "coevfit_example_09-1.csv") + sw <- suppressWarnings + n_tips <- m09$stan_data$N_tips + result <- sw(coev_ancestral_states( + m09, nodes = n_tips + 1L, scale = "response" + )) + expect_true(is.data.frame(result)) + expect_equal(nrow(result), length(m09$variables)) + expect_true(all(result$estimate >= 0 & result$estimate <= 1)) +}) + +test_that("coev_ancestral_states() works across fixture models", { + sw <- suppressWarnings + fixtures <- c( + "01", "02", "04", "05", "06", "08", "09", "10" + ) + for (id in fixtures) { + rds <- paste0("coevfit_example_", id, ".rds") + csv <- paste0("coevfit_example_", id, "-1.csv") + m <- readRDS(test_path("fixtures", rds)) + m <- reload_fit(m, filename = csv) + expect_no_error(sw(coev_ancestral_states(m))) + result <- sw(coev_ancestral_states(m)) + expect_true(is.data.frame(result)) + expect_true(all(is.finite(result$estimate))) + expect_no_error( + sw(coev_ancestral_states(m, scale = "response")) + ) + } +}) diff --git a/tests/testthat/test-coev_identify_nodes.R b/tests/testthat/test-coev_identify_nodes.R new file mode 100644 index 0000000..a0a3f43 --- /dev/null +++ b/tests/testthat/test-coev_identify_nodes.R @@ -0,0 +1,28 @@ +test_that("coev_identify_nodes() validates inputs", { + expect_error( + coev_identify_nodes(tree = "fail"), + "Argument 'tree' must be a 'phylo' object.", + fixed = TRUE + ) + tr <- ape::rcoal(5) + expect_error( + coev_identify_nodes(tree = tr, n = "fail"), + "Argument 'n' must be a single positive integer.", + fixed = TRUE + ) + expect_error( + coev_identify_nodes(tree = tr, n = 0), + "Argument 'n' must be a single positive integer.", + fixed = TRUE + ) + expect_error( + coev_identify_nodes(tree = tr, n = c(1, 2)), + "Argument 'n' must be a single positive integer.", + fixed = TRUE + ) + expect_error( + coev_identify_nodes(tree = tr, n = 1.5), + "Argument 'n' must be a single positive integer.", + fixed = TRUE + ) +}) diff --git a/tests/testthat/test-coev_plot_node_labels.R b/tests/testthat/test-coev_plot_node_labels.R new file mode 100644 index 0000000..728fc56 --- /dev/null +++ b/tests/testthat/test-coev_plot_node_labels.R @@ -0,0 +1,47 @@ +test_that("coev_plot_node_labels() produces expected errors and output", { + m01 <- readRDS(test_path("fixtures", "coevfit_example_01.rds")) + m01 <- reload_fit(m01, filename = "coevfit_example_01-1.csv") + # expect errors for invalid inputs + expect_error( + coev_plot_node_labels(object = "fail"), + "Argument 'object' must be a fitted coevolutionary model of class coevfit.", + fixed = TRUE + ) + expect_error( + coev_plot_node_labels(object = m01, tip_labels = "fail"), + "Argument 'tip_labels' must be TRUE or FALSE.", + fixed = TRUE + ) + expect_error( + coev_plot_node_labels(object = m01, node_cex = "fail"), + "Argument 'node_cex' must be a single positive number.", + fixed = TRUE + ) + expect_error( + coev_plot_node_labels(object = m01, node_cex = -1), + "Argument 'node_cex' must be a single positive number.", + fixed = TRUE + ) + expect_error( + coev_plot_node_labels(object = m01, node_cex = c(0.5, 0.6)), + "Argument 'node_cex' must be a single positive number.", + fixed = TRUE + ) + # should run without error and return phylo invisibly + expect_no_error(result <- coev_plot_node_labels(m01)) + expect_true(inherits(result, "phylo")) + # tip_labels = TRUE should also work + expect_no_error(coev_plot_node_labels(m01, tip_labels = TRUE)) + # custom node_cex and node_col + expect_no_error( + coev_plot_node_labels(m01, node_cex = 0.4, node_col = "blue") + ) +}) + +test_that("coev_plot_node_labels() handles multiPhylo trees", { + m08 <- readRDS(test_path("fixtures", "coevfit_example_08.rds")) + m08 <- reload_fit(m08, filename = "coevfit_example_08-1.csv") + expect_true(inherits(m08$tree, "multiPhylo")) + expect_no_error(result <- coev_plot_node_labels(m08)) + expect_true(inherits(result, "phylo")) +}) diff --git a/vignettes/ancestral_states.Rmd b/vignettes/ancestral_states.Rmd new file mode 100644 index 0000000..d37294d --- /dev/null +++ b/vignettes/ancestral_states.Rmd @@ -0,0 +1,429 @@ +--- +title: "Ancestral State Reconstruction with coevolve" +output: + rmarkdown::html_vignette: + toc: true +pkgdown: + as_is: true +date: "2026-04-27" +author: "Erik Ringen" +vignette: > + %\VignetteIndexEntry{Ancestral State Reconstruction with coevolve} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +## Overview + +The `coev_ancestral_states()` function extracts posterior estimates of latent +trait values at internal (ancestral) nodes of the phylogeny from a fitted +`coevfit` model. Because the model estimates latent states at every node in the +tree, ancestral state reconstruction is a natural byproduct of model fitting -- +no separate analysis is needed. + +This vignette demonstrates the function using the **authority** dataset bundled +with the package: political and religious authority among 97 Austronesian +societies. + +## Setup + +``` r +library(coevolve) +library(ggplot2) +library(dplyr) +library(tidyr) +library(ape) +theme_set(theme_minimal(base_size = 13)) +``` + +## Fit the model + +We fit a coevolutionary model with two ordered-logistic variables. For +demonstration purposes we use moderate sampling settings; in practice you would +want more iterations and to check convergence carefully. + +``` r +fit <- coev_fit( + data = authority$data, + variables = list( + political_authority = "ordered_logistic", + religious_authority = "ordered_logistic" + ), + id = "language", + tree = authority$phylogeny, + parallel_chains = 4, + chains = 4, + iter_sampling = 500, + iter_warmup = 500, + seed = 42 +) +``` + +``` +Running MCMC with 4 parallel chains... + +Chain 4 finished in 84.6 seconds. +Chain 3 finished in 114.0 seconds. +Chain 1 finished in 115.0 seconds. +Chain 2 finished in 119.0 seconds. + +All 4 chains finished successfully. +Mean chain execution time: 108.1 seconds. +Total execution time: 119.2 seconds. +``` + +Quick check that the model sampled adequately: + +``` r +summary(fit) +``` + +``` +Variables: political_authority = ordered_logistic + religious_authority = ordered_logistic + Data: authority$data (Number of observations: 97) +Phylogeny: authority$phylogeny (Number of trees: 1) + Draws: 4 chains, each with iter = 500; warmup = 500; thin = 1 + total post-warmup draws = 2000 + +Autoregressive selection effects: + Estimate Est.Error 2.5% 97.5% Rhat Bulk_ESS Tail_ESS +political_authority -0.53 0.44 -1.64 -0.02 1.00 1420 1270 +religious_authority -0.60 0.48 -1.82 -0.03 1.00 1549 1137 + +Cross selection effects: + Estimate Est.Error 2.5% 97.5% Rhat +political_authority ⟶ religious_authority 1.50 0.74 -0.04 2.87 1.00 +religious_authority ⟶ political_authority 1.05 0.83 -0.62 2.64 1.00 + Bulk_ESS Tail_ESS +political_authority ⟶ religious_authority 842 1055 +religious_authority ⟶ political_authority 701 1384 + +Drift parameters: + Estimate Est.Error 2.5% 97.5% +sd(political_authority) 2.15 0.82 0.37 3.70 +sd(religious_authority) 1.46 0.82 0.13 3.10 +cor(political_authority,religious_authority) 0.37 0.30 -0.35 0.85 + Rhat Bulk_ESS Tail_ESS +sd(political_authority) 1.00 453 406 +sd(religious_authority) 1.01 416 887 +cor(political_authority,religious_authority) 1.00 1116 1360 + +Continuous time intercept parameters: + Estimate Est.Error 2.5% 97.5% Rhat Bulk_ESS Tail_ESS +political_authority 0.31 0.91 -1.55 2.12 1.00 2426 1337 +religious_authority 0.31 0.91 -1.47 2.14 1.01 3135 1612 + +Ordinal cutpoint parameters: + Estimate Est.Error 2.5% 97.5% Rhat Bulk_ESS Tail_ESS +political_authority[1] -1.44 0.88 -3.13 0.23 1.00 1266 1010 +political_authority[2] -0.70 0.86 -2.36 1.01 1.00 1418 1276 +political_authority[3] 1.48 0.86 -0.20 3.19 1.00 1661 1415 +religious_authority[1] -1.59 0.92 -3.32 0.24 1.00 1424 1248 +religious_authority[2] -0.92 0.90 -2.61 0.87 1.00 1699 1531 +religious_authority[3] 1.45 0.92 -0.24 3.28 1.00 1913 1488 +``` + +``` +Warning: There were 2 divergent transitions after warmup. +http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup +``` + +## Ancestral states on the latent scale + +The default call returns posterior summaries (median + 95% credible interval) for +all internal nodes on the latent (eta) scale: + +``` r +asr_latent <- coev_ancestral_states(fit) +head(asr_latent) +``` + +``` +# A tibble: 6 x 6 + node variable category estimate lower upper + +1 98 political_authority 0.377 -1.44 2.15 +2 98 religious_authority 0.292 -1.52 1.92 +3 99 political_authority 0.363 -1.39 2.20 +4 99 religious_authority 0.282 -1.44 1.87 +5 100 political_authority 0.235 -1.94 2.44 +6 100 religious_authority 0.276 -1.61 2.20 +``` + +The result is a long-format tibble with one row per node-variable combination +(or one row per node-variable-category for ordinal-response output below). The +`category` column is `NA` on the latent scale and for non-ordinal variables; it +holds category labels for ordinal-response output. The `node` column uses ape's +numbering convention (tips are `1:N_tips`, internal nodes are +`(N_tips+1):(2*N_tips-1)`), so it integrates directly with phylo objects. + +### Visualizing latent ancestral states on the tree + +We can paint the tree with estimated ancestral values by mapping the posterior +median to a diverging color gradient. Blue indicates low latent values; red +indicates high: + +``` r +tree <- attr(asr_latent, "ref_tree") +n_tips <- length(tree$tip.label) +asr_all <- coev_ancestral_states(fit, nodes = "all") + +pal <- colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(100) +rng <- range(asr_all$estimate) + +# helper: map values onto the palette +to_color <- function(x) { + pal[pmax(1, pmin(100, findInterval(x, seq(rng[1], rng[2], length.out = 101))))] +} + +par(mfrow = c(1, 2), mar = c(2, 0, 2, 0)) + +for (var in unique(asr_all$variable)) { + vals <- setNames(asr_all$estimate[asr_all$variable == var], + asr_all$node[asr_all$variable == var]) + plot(tree, show.tip.label = FALSE, edge.width = 1.5, + edge.color = "grey70", main = gsub("_", " ", var)) + internal_ids <- (n_tips + 1):(n_tips + tree$Nnode) + nodelabels(pch = 21, bg = to_color(vals[as.character(internal_ids)]), + col = NA, cex = 1.2) +} + +# shared color bar +par(fig = c(0.35, 0.65, 0, 0.05), new = TRUE, mar = rep(0, 4)) +image(seq(rng[1], rng[2], length.out = 100), 1, + matrix(1:100), col = pal, axes = FALSE, xlab = "", ylab = "") +axis(1, cex.axis = 0.7, padj = -1.5, tck = -0.3) +mtext("Latent eta", side = 1, line = 1.2, cex = 0.7) +``` + +
+plot of chunk asr-latent-tree +
+ +### Joint latent-space scatterplot + +We can also look at the joint distribution of ancestral states across the two +variables. Each point is an internal node, with crosshairs showing the 95% +credible interval: + +``` r +asr_wide <- asr_latent |> + select(node, variable, estimate, lower, upper) |> + pivot_wider( + names_from = variable, + values_from = c(estimate, lower, upper) + ) + +ggplot(asr_wide, + aes(x = estimate_political_authority, + y = estimate_religious_authority)) + + geom_errorbar( + aes(ymin = lower_religious_authority, + ymax = upper_religious_authority), + alpha = 0.2, width = 0 + ) + + geom_errorbarh( + aes(xmin = lower_political_authority, + xmax = upper_political_authority), + alpha = 0.2, height = 0 + ) + + geom_point(color = "#B2182B", size = 2) + + labs( + x = "Political authority (latent)", + y = "Religious authority (latent)", + title = "Ancestral states at internal nodes" + ) +``` + +
+plot of chunk asr-latent-scatter +
+ +## Ancestral states on the response scale + +For ordered-logistic variables, the response scale returns estimated category +probabilities at each node -- how likely each level of authority (absent, +sublocal, local, supralocal) was at that ancestral node. Each ordinal variable +expands to one row per category, with `category` populated: + +``` r +asr_resp <- coev_ancestral_states( + fit, nodes = "all", scale = "response" +) +head(asr_resp, 8) +``` + +``` +# A tibble: 8 x 6 + node variable category estimate lower upper + +1 1 political_authority cat_1 0.0442 0.00410 0.143 +2 1 political_authority cat_2 0.0419 0.00498 0.123 +3 1 political_authority cat_3 0.346 0.131 0.555 +4 1 political_authority cat_4 0.568 0.330 0.798 +5 1 religious_authority cat_1 0.0348 0.00270 0.122 +6 1 religious_authority cat_2 0.0284 0.00298 0.092 +7 1 religious_authority cat_3 0.340 0.123 0.566 +8 1 religious_authority cat_4 0.597 0.355 0.836 +``` + +The point estimate (`estimate`) for ordinal-response rows is the posterior +\emph{mean}, so the per-node category estimates sum exactly to 1 (matching the +convention of `predict.brmsfit`). The `lower` and `upper` columns are +per-category posterior quantiles and won't generally sum to 1. + +### Pie chart tree: response-scale probabilities + +This is the most intuitive way to visualize ordinal ancestral states. Each pie +chart at an internal node shows the posterior median probability of each +category. At the tips, observed values are shown as solid colors (one category +with probability 1). If a tip has missing data, the model-estimated +probabilities are shown instead. + +``` r +cat_labels <- c("Absent", "Sublocal", "Local", "Supralocal") +cat_colors <- c("#FFFFB2", "#FECC5C", "#FD8D3C", "#E31A1C") +n_cats <- length(cat_labels) + +# pivot ordinal-response long format -> per-node probability matrix +to_pie_mat <- function(df) { + wide <- df |> + select(node, category, estimate) |> + pivot_wider(names_from = category, values_from = estimate) |> + arrange(node) + as.matrix(wide[, paste0("cat_", seq_len(n_cats))]) +} + +par(mfrow = c(1, 2), mar = c(1, 0, 3, 0)) + +for (var in unique(asr_resp$variable)) { + df_var <- asr_resp |> filter(variable == var) + plot(tree, show.tip.label = FALSE, edge.width = 1.5, + edge.color = "grey50", main = gsub("_", " ", var)) + # internal nodes: model-estimated category probabilities + nodelabels(pie = to_pie_mat(filter(df_var, node > n_tips)), + piecol = cat_colors, cex = 0.6) + # tips: observed value (deterministic) when present, otherwise model estimate + tip_pie <- to_pie_mat(filter(df_var, node <= n_tips)) + obs <- authority$data[[var]][match(tree$tip.label, authority$data$language)] + observed <- !is.na(obs) + tip_pie[observed, ] <- 0 + tip_pie[cbind(which(observed), as.integer(obs[observed]))] <- 1 + tiplabels(pie = tip_pie, piecol = cat_colors, cex = 0.4) +} + +# shared legend +par(fig = c(0.3, 0.7, 0, 0.05), new = TRUE, mar = rep(0, 4)) +plot.new() +legend("center", legend = cat_labels, fill = cat_colors, + horiz = TRUE, bty = "n", cex = 0.9, title = "Authority level") +``` + +
+plot of chunk asr-pie-tree +
+ +## Working with raw posterior draws + +Setting `summary = FALSE` returns the full posterior array, which is useful for +custom summaries or propagating uncertainty: + +``` r +asr_draws <- coev_ancestral_states(fit, summary = FALSE) +str(asr_draws, max.level = 1) +``` + +``` +List of 4 + $ draws : num [1:2000, 1:96, 1:2] 0.914 0.294 1.224 0.221 0.319 ... + $ ref_tree :List of 5 + $ node_ids : int [1:96] 98 99 100 101 102 103 104 105 106 107 ... + $ variable_names: chr [1:2] "political_authority" "religious_authority" +``` + +The `draws` element is a 3D array with dimensions `[draws, nodes, variables]`. +Here's an example: the posterior density of latent values at the root node for +both variables: + +``` r +root_node_idx <- which(asr_draws$node_ids == n_tips + 1) + +root_df <- data.frame( + political_authority = asr_draws$draws[, root_node_idx, 1], + religious_authority = asr_draws$draws[, root_node_idx, 2] +) |> + pivot_longer(everything(), names_to = "variable", values_to = "eta") + +ggplot(root_df, aes(x = eta, fill = variable)) + + geom_density(alpha = 0.5) + + scale_fill_manual( + values = c(political_authority = "#2166AC", + religious_authority = "#B2182B"), + labels = c("Political", "Religious"), + name = "Authority" + ) + + labs( + x = "Latent value (eta)", + y = "Density", + title = "Posterior distribution at the root node" + ) +``` + +
+plot of chunk asr-root-density +
+ +## Identifying nodes on the tree + +To query ancestral states for specific nodes, you first need to know their IDs. +The `coev_plot_node_labels()` function plots the tree with internal node IDs +displayed in red: + +``` r +tree <- coev_plot_node_labels(fit) +``` + +
+plot of chunk asr-node-labels +
+ +Node numbering follows ape's convention: tips are `1:N_tips` and internal nodes +are `(N_tips + 1):(2 * N_tips - 1)`. + +### Interactive node selection + +In an interactive R session you can click directly on the tree to identify +nodes. `coev_identify_nodes()` wraps `ape::identify.phylo()` in a loop so that +multiple clicks accumulate; the loop ends when you click outside the tree (or +press Esc, depending on your graphics device): + +``` r +tree <- coev_plot_node_labels(fit) +nodes <- coev_identify_nodes(tree) +coev_ancestral_states(fit, nodes = nodes) +``` + +## Selecting specific nodes and variables + +Once you know the node IDs, you can target specific nodes or variables to reduce +computation and focus the output: + +``` r +# only religious authority, at three specific internal nodes +asr_sub <- coev_ancestral_states( + fit, + variables = "religious_authority", + nodes = c(n_tips + 1, n_tips + 2, n_tips + 3) +) +asr_sub +``` + +``` +# A tibble: 3 x 6 + node variable category estimate lower upper + +1 98 religious_authority 0.292 -1.52 1.92 +2 99 religious_authority 0.282 -1.44 1.87 +3 100 religious_authority 0.276 -1.61 2.20 +``` diff --git a/vignettes/ancestral_states.Rmd.orig b/vignettes/ancestral_states.Rmd.orig new file mode 100644 index 0000000..17ca79c --- /dev/null +++ b/vignettes/ancestral_states.Rmd.orig @@ -0,0 +1,315 @@ +--- +title: "Ancestral State Reconstruction with coevolve" +output: + rmarkdown::html_vignette: + toc: true +pkgdown: + as_is: true +date: "`r Sys.Date()`" +author: "Erik Ringen" +vignette: > + %\VignetteIndexEntry{Ancestral State Reconstruction with coevolve} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +options(width = 120) +knitr::opts_chunk$set( + comment = NA, + fig.path = "vignettes/", + dev = "png", + dpi = 150, + fig.asp = 0.8, + out.width = "60%", + fig.align = "center" +) +``` + +## Overview + +The `coev_ancestral_states()` function extracts posterior estimates of latent +trait values at internal (ancestral) nodes of the phylogeny from a fitted +`coevfit` model. Because the model estimates latent states at every node in the +tree, ancestral state reconstruction is a natural byproduct of model fitting -- +no separate analysis is needed. + +This vignette demonstrates the function using the **authority** dataset bundled +with the package: political and religious authority among 97 Austronesian +societies. + +## Setup + +```{r libraries, warning=FALSE, message=FALSE} +library(coevolve) +library(ggplot2) +library(dplyr) +library(tidyr) +library(ape) +theme_set(theme_minimal(base_size = 13)) +``` + +## Fit the model + +We fit a coevolutionary model with two ordered-logistic variables. For +demonstration purposes we use moderate sampling settings; in practice you would +want more iterations and to check convergence carefully. + +```{r asr-fit, warning=FALSE, message=FALSE} +fit <- coev_fit( + data = authority$data, + variables = list( + political_authority = "ordered_logistic", + religious_authority = "ordered_logistic" + ), + id = "language", + tree = authority$phylogeny, + parallel_chains = 4, + chains = 4, + iter_sampling = 500, + iter_warmup = 500, + seed = 42 +) +``` + +Quick check that the model sampled adequately: + +```{r asr-summary} +summary(fit) +``` + +## Ancestral states on the latent scale + +The default call returns posterior summaries (median + 95% credible interval) for +all internal nodes on the latent (eta) scale: + +```{r asr-latent} +asr_latent <- coev_ancestral_states(fit) +head(asr_latent) +``` + +The result is a long-format tibble with one row per node-variable combination +(or one row per node-variable-category for ordinal-response output below). The +`category` column is `NA` on the latent scale and for non-ordinal variables; it +holds category labels for ordinal-response output. The `node` column uses ape's +numbering convention (tips are `1:N_tips`, internal nodes are +`(N_tips+1):(2*N_tips-1)`), so it integrates directly with phylo objects. + +### Visualizing latent ancestral states on the tree + +We can paint the tree with estimated ancestral values by mapping the posterior +median to a diverging color gradient. Blue indicates low latent values; red +indicates high: + +```{r asr-latent-tree, fig.asp=0.55, out.width="80%"} +tree <- attr(asr_latent, "ref_tree") +n_tips <- length(tree$tip.label) +asr_all <- coev_ancestral_states(fit, nodes = "all") + +pal <- colorRampPalette(c("#2166AC", "#F7F7F7", "#B2182B"))(100) +rng <- range(asr_all$estimate) + +# helper: map values onto the palette +to_color <- function(x) { + pal[pmax(1, pmin(100, findInterval(x, seq(rng[1], rng[2], length.out = 101))))] +} + +par(mfrow = c(1, 2), mar = c(2, 0, 2, 0)) + +for (var in unique(asr_all$variable)) { + vals <- setNames(asr_all$estimate[asr_all$variable == var], + asr_all$node[asr_all$variable == var]) + plot(tree, show.tip.label = FALSE, edge.width = 1.5, + edge.color = "grey70", main = gsub("_", " ", var)) + internal_ids <- (n_tips + 1):(n_tips + tree$Nnode) + nodelabels(pch = 21, bg = to_color(vals[as.character(internal_ids)]), + col = NA, cex = 1.2) +} + +# shared color bar +par(fig = c(0.35, 0.65, 0, 0.05), new = TRUE, mar = rep(0, 4)) +image(seq(rng[1], rng[2], length.out = 100), 1, + matrix(1:100), col = pal, axes = FALSE, xlab = "", ylab = "") +axis(1, cex.axis = 0.7, padj = -1.5, tck = -0.3) +mtext("Latent eta", side = 1, line = 1.2, cex = 0.7) +``` + +### Joint latent-space scatterplot + +We can also look at the joint distribution of ancestral states across the two +variables. Each point is an internal node, with crosshairs showing the 95% +credible interval: + +```{r asr-latent-scatter} +asr_wide <- asr_latent |> + select(node, variable, estimate, lower, upper) |> + pivot_wider( + names_from = variable, + values_from = c(estimate, lower, upper) + ) + +ggplot(asr_wide, + aes(x = estimate_political_authority, + y = estimate_religious_authority)) + + geom_errorbar( + aes(ymin = lower_religious_authority, + ymax = upper_religious_authority), + alpha = 0.2, width = 0 + ) + + geom_errorbarh( + aes(xmin = lower_political_authority, + xmax = upper_political_authority), + alpha = 0.2, height = 0 + ) + + geom_point(color = "#B2182B", size = 2) + + labs( + x = "Political authority (latent)", + y = "Religious authority (latent)", + title = "Ancestral states at internal nodes" + ) +``` + +## Ancestral states on the response scale + +For ordered-logistic variables, the response scale returns estimated category +probabilities at each node -- how likely each level of authority (absent, +sublocal, local, supralocal) was at that ancestral node. Each ordinal variable +expands to one row per category, with `category` populated: + +```{r asr-response} +asr_resp <- coev_ancestral_states( + fit, nodes = "all", scale = "response" +) +head(asr_resp, 8) +``` + +The point estimate (`estimate`) for ordinal-response rows is the posterior +mean, so the per-node category estimates sum exactly to 1 (matching the +convention of `predict.brmsfit`). The `lower` and `upper` columns are +per-category posterior quantiles and won't generally sum to 1. + +### Pie chart tree: response-scale probabilities + +This is the most intuitive way to visualize ordinal ancestral states. Each pie +chart at an internal node shows the posterior mean probability of each +category. At the tips, observed values are shown as solid colors (one category +with probability 1). If a tip has missing data, the model-estimated +probabilities are shown instead. + +```{r asr-pie-tree, fig.asp=0.55, out.width="80%"} +cat_labels <- c("Absent", "Sublocal", "Local", "Supralocal") +cat_colors <- c("#FFFFB2", "#FECC5C", "#FD8D3C", "#E31A1C") +n_cats <- length(cat_labels) + +# pivot ordinal-response long format -> per-node probability matrix +to_pie_mat <- function(df) { + wide <- df |> + select(node, category, estimate) |> + pivot_wider(names_from = category, values_from = estimate) |> + arrange(node) + as.matrix(wide[, paste0("cat_", seq_len(n_cats))]) +} + +par(mfrow = c(1, 2), mar = c(1, 0, 3, 0)) + +for (var in unique(asr_resp$variable)) { + df_var <- asr_resp |> filter(variable == var) + plot(tree, show.tip.label = FALSE, edge.width = 1.5, + edge.color = "grey50", main = gsub("_", " ", var)) + # internal nodes: model-estimated category probabilities + nodelabels(pie = to_pie_mat(filter(df_var, node > n_tips)), + piecol = cat_colors, cex = 0.6) + # tips: observed value (deterministic) when present, otherwise model estimate + tip_pie <- to_pie_mat(filter(df_var, node <= n_tips)) + obs <- authority$data[[var]][match(tree$tip.label, authority$data$language)] + observed <- !is.na(obs) + tip_pie[observed, ] <- 0 + tip_pie[cbind(which(observed), as.integer(obs[observed]))] <- 1 + tiplabels(pie = tip_pie, piecol = cat_colors, cex = 0.4) +} + +# shared legend +par(fig = c(0.3, 0.7, 0, 0.05), new = TRUE, mar = rep(0, 4)) +plot.new() +legend("center", legend = cat_labels, fill = cat_colors, + horiz = TRUE, bty = "n", cex = 0.9, title = "Authority level") +``` + +## Working with raw posterior draws + +Setting `summary = FALSE` returns the full posterior array, which is useful for +custom summaries or propagating uncertainty: + +```{r asr-draws-str} +asr_draws <- coev_ancestral_states(fit, summary = FALSE) +str(asr_draws, max.level = 1) +``` + +The `draws` element is a 3D array with dimensions `[draws, nodes, variables]`. +Here's an example: the posterior density of latent values at the root node for +both variables: + +```{r asr-root-density} +root_node_idx <- which(asr_draws$node_ids == n_tips + 1) + +root_df <- data.frame( + political_authority = asr_draws$draws[, root_node_idx, 1], + religious_authority = asr_draws$draws[, root_node_idx, 2] +) |> + pivot_longer(everything(), names_to = "variable", values_to = "eta") + +ggplot(root_df, aes(x = eta, fill = variable)) + + geom_density(alpha = 0.5) + + scale_fill_manual( + values = c(political_authority = "#2166AC", + religious_authority = "#B2182B"), + labels = c("Political", "Religious"), + name = "Authority" + ) + + labs( + x = "Latent value (eta)", + y = "Density", + title = "Posterior distribution at the root node" + ) +``` + +## Identifying nodes on the tree + +To query ancestral states for specific nodes, you first need to know their IDs. +The `coev_plot_node_labels()` function plots the tree with internal node IDs +displayed in red: + +```{r asr-node-labels, fig.asp=1, out.width="60%"} +tree <- coev_plot_node_labels(fit) +``` + +Node numbering follows ape's convention: tips are `1:N_tips` and internal nodes +are `(N_tips + 1):(2 * N_tips - 1)`. + +### Interactive node selection + +In an interactive R session you can click directly on the tree to identify +nodes. `coev_identify_nodes()` wraps `ape::identify.phylo()` in a loop so that +multiple clicks accumulate; the loop ends when you click outside the tree (or +press Esc, depending on your graphics device): + +``` r +tree <- coev_plot_node_labels(fit) +nodes <- coev_identify_nodes(tree) +coev_ancestral_states(fit, nodes = nodes) +``` + +## Selecting specific nodes and variables + +Once you know the node IDs, you can target specific nodes or variables to reduce +computation and focus the output: + +```{r asr-subset} +# only religious authority, at three specific internal nodes +asr_sub <- coev_ancestral_states( + fit, + variables = "religious_authority", + nodes = c(n_tips + 1, n_tips + 2, n_tips + 3) +) +asr_sub +``` diff --git a/vignettes/asr-latent-scatter-1.png b/vignettes/asr-latent-scatter-1.png new file mode 100644 index 0000000..239f20c Binary files /dev/null and b/vignettes/asr-latent-scatter-1.png differ diff --git a/vignettes/asr-latent-tree-1.png b/vignettes/asr-latent-tree-1.png new file mode 100644 index 0000000..618996e Binary files /dev/null and b/vignettes/asr-latent-tree-1.png differ diff --git a/vignettes/asr-node-labels-1.png b/vignettes/asr-node-labels-1.png new file mode 100644 index 0000000..a12952a Binary files /dev/null and b/vignettes/asr-node-labels-1.png differ diff --git a/vignettes/asr-pie-tree-1.png b/vignettes/asr-pie-tree-1.png new file mode 100644 index 0000000..1a9e6d2 Binary files /dev/null and b/vignettes/asr-pie-tree-1.png differ diff --git a/vignettes/asr-root-density-1.png b/vignettes/asr-root-density-1.png new file mode 100644 index 0000000..4e21cdb Binary files /dev/null and b/vignettes/asr-root-density-1.png differ diff --git a/vignettes/precompile.R b/vignettes/precompile.R index 26b7e2f..3f5f1de 100644 --- a/vignettes/precompile.R +++ b/vignettes/precompile.R @@ -2,3 +2,7 @@ # Need to change figure paths in Rmd files afterwards (remove "vignettes/") # see: https://ropensci.org/blog/2019/12/08/precompute-vignettes/ knitr::knit("vignettes/coevolve.Rmd.orig", "vignettes/coevolve.Rmd") +knitr::knit( + "vignettes/ancestral_states.Rmd.orig", + "vignettes/ancestral_states.Rmd" +)