diff --git a/.Rbuildignore b/.Rbuildignore index d8d42c09..19a02a58 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -20,4 +20,4 @@ CITATION.cff$ ^docs$ ^pkgdown$ ^kinship2 - Shortcut - +\.X$ diff --git a/.gitignore b/.gitignore index 1138d94f..acb3bdec 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ *.lnk docs *.code-workspace +R/plot.ped.R +R/plot.ped.X diff --git a/DESCRIPTION b/DESCRIPTION index 9028ee0f..e6a0024a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Imports: ggrepel, rlang, dplyr, + stringr, utils Suggests: EasyMx, @@ -30,6 +31,7 @@ Suggests: OpenMx, NlsyLinks, rmarkdown, + tibble, testthat (>= 3.0.0) VignetteBuilder: knitr diff --git a/R/calcConnections.R b/R/calcConnections.R new file mode 100644 index 00000000..eaf01859 --- /dev/null +++ b/R/calcConnections.R @@ -0,0 +1,348 @@ +#' Calculate connections for a pedigree dataset +#' +#' Computes graphical connection paths for a pedigree layout, including parent-child, +#' sibling, and spousal connections. Optionally processes duplicate appearances +#' of individuals (marked as `extra`) to ensure relational accuracy. +#' +#' @inheritParams ggpedigree +#' @param config List of configuration parameters. Currently unused but passed through to internal helpers. +#' @return A `data.frame` containing connection points and midpoints for graphical rendering. Includes: +#' \itemize{ +#' \item `x_pos`, `y_pos`: positions of focal individual +#' \item `x_dad`, `y_dad`, `x_mom`, `y_mom`: parental positions (if available) +#' \item `x_spouse`, `y_spouse`: spousal positions (if available) +#' \item `x_midparent`, `y_midparent`: midpoint between parents +#' \item `x_mid_sib`, `y_mid_sib`: sibling group midpoint +#' \item `x_mid_spouse`, `y_mid_spouse`: midpoint between spouses +#' } +#' +#' @export + +calculateConnections <- function(ped, + config = list()) { + # Check inputs ----------------------------------------------------------- + if (!inherits(ped, "data.frame")) { + stop("ped should be a data.frame or inherit to a data.frame") + } + if (!all(c("personID", "x_pos", "y_pos", "dadID", "momID") %in% names(ped))) { + stop("ped must contain personID, x_pos, y_pos, dadID, and momID columns") + } + + # Default configuration placeholder + default_config <- list() + config <- utils::modifyList(default_config, config) + + + # Add spouseID if missing + if (!all("spouseID" %in% names(ped))) { + ped$spouseID <- NA + # Attempt to infer spouse based on parenthood (not always reliable) + # this will give you the mom that is the spouse of the dad + # ped$spouseID <- ped$momID[match(ped$personID, ped$dadID)] + # this will give you the dad that is the spouse of the mom + # ped$spouseID <- ped$dadID[match(ped$personID, ped$momID)] + + ped$spouseID <- ifelse(!is.na(ped$momID[match(ped$personID, ped$dadID)]), + ped$momID[match(ped$personID, ped$dadID)], + ped$dadID[match(ped$personID, ped$momID)] + ) + } + # Add famID if missing (used for grouping) + if (!all("famID" %in% names(ped))) { + ped$famID <- 1 + } + + # create a unique parent_hash for each individual + # this will be used to identify siblings + if (!all("parent_hash" %in% names(ped))) { + ped <- ped |> + dplyr::mutate( + parent_hash = symKey(.data$momID, .data$dadID), + couple_hash = symKey(.data$personID, .data$spouseID) + ) |> + dplyr::mutate( + parent_hash = gsub("NA.NA", NA, .data$parent_hash), + couple_hash = gsub("NA.NA", NA, .data$couple_hash) + ) + } + + # If duplicated appearances exist, resolve which connections to keep + if (sum(ped$extra) > 0) { + full_extra <- processExtras(ped, config = config) + + ped <- full_extra$ped |> unique() + } else { + ped <- ped |> + dplyr::mutate( + coreID = .data$personID + ) + } + connections <- dplyr::select( + .data = ped, + "personID", + "x_pos", "y_pos", + "dadID", "momID", "parent_hash", "couple_hash", + "spouseID", + "famID", + "extra" + ) |> unique() + + # no duplications, so just use the same connections + connections_for_sibs <- connections_for_spouses <- connections_for_dads <- connections_for_moms <- connections + + + connections <- connections |> + dplyr::mutate( + link_as_mom = TRUE, + link_as_dad = TRUE, + link_as_spouse = TRUE, + link_as_sibling = TRUE + ) + + + + + + # Get mom's coordinates + mom_connections <- getRelativeCoordinates( + ped = ped, + connections = connections_for_moms, + relativeIDvar = "momID", + x_name = "x_mom", + y_name = "y_mom" + ) + + # Get dad's coordinates + dad_connections <- getRelativeCoordinates( + ped = ped, + connections = connections_for_dads, + relativeIDvar = "dadID", + x_name = "x_dad", + y_name = "y_dad" + ) + + # Get spouse coordinates + spouse_connections <- ped |> + dplyr::select( + "personID", "x_pos", + "y_pos", "spouseID", "couple_hash" + ) |> + dplyr::left_join(connections_for_spouses, + by = c("spouseID" = "personID"), + suffix = c("", "_spouse"), + multiple = "all" + ) |> + dplyr::rename( + x_spouse = "x_pos_spouse", + y_spouse = "y_pos_spouse" + ) |> + dplyr::select( + "personID", "spouseID", + "x_spouse", "y_spouse", "couple_hash" + ) |> + unique() + + # Combine mom, dad, and spouse coordinates + connections <- connections |> + dplyr::left_join(mom_connections, + by = c("personID", "momID") + ) |> + dplyr::left_join(dad_connections, + by = c("personID", "dadID") + ) |> + dplyr::left_join(spouse_connections, + by = c("personID", "spouseID", "couple_hash") + ) |> + unique() + + # Calculate midpoints between mom and dad in child row + + parent_midpoints <- connections |> + dplyr::filter(.data$link_as_sibling & + !is.na(.data$dadID) & !is.na(.data$momID)) |> + dplyr::group_by(.data$parent_hash) |> + dplyr::summarize( + x_midparent = mean(c( + dplyr::first(.data$x_dad), + dplyr::first(.data$x_mom) + )), + y_midparent = mean(c( + dplyr::first(.data$y_dad), + dplyr::first(.data$y_mom) + )), + .groups = "drop" + ) |> + unique() + + # Calculate midpoints between spouses + spouse_midpoints <- connections |> + dplyr::filter( + .data$link_as_spouse, + !is.na(.data$spouseID) + ) |> + dplyr::group_by(.data$spouseID, .data$couple_hash) |> + dplyr::summarize( + x_mid_spouse = mean(c( + dplyr::first(.data$x_pos), + dplyr::first(.data$x_spouse) + )), + y_mid_spouse = mean(c( + dplyr::first(.data$y_pos), + dplyr::first(.data$y_spouse) + )), + .groups = "drop" + ) |> + unique() + + # Calculate sibling group midpoints + sibling_midpoints <- connections |> + dplyr::filter( + .data$link_as_sibling, + !is.na(.data$momID) & !is.na(.data$dadID) & # biological parents defined + !is.na(.data$x_mom) & !is.na(.data$y_mom) & # mom’s coordinates linked + !is.na(.data$x_dad) & !is.na(.data$y_dad) # dad’s coordinates linked + ) |> + dplyr::group_by( + .data$parent_hash, + .data$x_mom, .data$y_mom, + .data$x_dad, .data$y_dad + ) |> + dplyr::summarize( + x_mid_sib = mean(.data$x_pos), + y_mid_sib = dplyr::first(.data$y_pos), + .groups = "drop" + ) |> + unique() + + # print(parent_midpoints) + # Merge midpoints into connections + connections <- connections |> + dplyr::left_join(parent_midpoints, + by = c("parent_hash") + ) |> + dplyr::left_join(spouse_midpoints, + by = c("spouseID", "couple_hash") + ) |> + dplyr::left_join(sibling_midpoints, + by = c( + "parent_hash", "x_mom", "y_mom", + "x_dad", "y_dad" + ) + ) |> + dplyr::mutate( + x_mid_sib = dplyr::case_when( + is.na(.data$x_dad) & is.na(.data$x_mom) ~ NA_real_, + !is.na(.data$x_mid_sib) ~ .data$x_mid_sib, + (!is.na(.data$momID) & !is.na(.data$x_mom)) | (!is.na(.data$dadID) & !is.na(.data$x_dad)) ~ .data$x_pos, + TRUE ~ NA_real_ + ), + y_mid_sib = dplyr::case_when( + is.na(.data$y_dad) & is.na(.data$y_mom) ~ NA_real_, + !is.na(.data$y_mid_sib) ~ .data$y_mid_sib, + (!is.na(.data$momID) & !is.na(.data$y_mom)) | (!is.na(.data$dadID) & !is.na(.data$y_dad)) ~ .data$y_pos, + TRUE ~ NA_real_ + ) + ) |> + unique() |> + dplyr::mutate( + x_mid_sib = dplyr::if_else(.data$link_as_sibling, .data$x_mid_sib, NA_real_), + y_mid_sib = dplyr::if_else(.data$link_as_sibling, .data$y_mid_sib, NA_real_) + ) + + if (exists("full_extra")) { + plot_connections <- list( + connections = connections, + self_coords = full_extra$self_coords, + connections_spouse_segment = build_connections_spouse_segment( + ped = ped, + connections_for_FOO = connections_for_spouses + ) + ) + } else { + plot_connections <- list( + connections = connections, + self_coords = FALSE, + connections_spouse_segment = build_connections_spouse_segment( + ped = ped, + connections_for_FOO = connections_for_spouses + ) + ) + } + return(plot_connections) +} + + +build_connections_spouse_segment <- function(ped, connections_for_FOO, use_hash = TRUE) { + if (use_hash == TRUE) { + # I want to make segments for each hash, because some people have multiple spouses + # this is to add those missing segments + parent_hash_connections <- ped |> + dplyr::select("parent_hash") |> + dplyr::mutate( + parent1 = # needs to be the first part of the string + stringr::str_extract(.data$parent_hash, "^[^.]+"), + parent2 = # needs to be the second part of the string\ + stringr::str_extract(.data$parent_hash, "(?<=\\.)[^.]+") + ) |> + dplyr::left_join(connections_for_FOO |> + dplyr::mutate(personID = paste0(.data$personID)), + by = c("parent1" = "personID"), + suffix = c("", "_parent1"), + multiple = "any" + ) |> + dplyr::left_join(connections_for_FOO |> + dplyr::mutate(personID = paste0(.data$personID)), + by = c("parent2" = "personID"), + suffix = c("", "_parent2"), + multiple = "any" + ) |> + dplyr::mutate( + x_start = .data$x_pos, + x_end = .data$x_pos_parent2, + y_start = .data$y_pos, + y_end = .data$y_pos_parent2 + ) |> + dplyr::select( + -"parent_hash", + -"parent1", + -"parent2", + -"x_pos", + -"y_pos", + -"x_pos_parent2", + -"y_pos_parent2" + ) + + + + # Get spouse coordinates + } else { + # spouses + # Get spouse coordinates + spouse_connections <- ped |> + dplyr::select( + "personID", "x_pos", + "y_pos", "spouseID" + ) |> + dplyr::filter(!is.na(.data$spouseID)) |> + dplyr::left_join(connections_for_FOO, + by = c("spouseID" = "personID"), + suffix = c("", "_spouse"), + multiple = "any" + ) |> + dplyr::rename( + x_spouse = "x_pos_spouse", + y_spouse = "y_pos_spouse" + ) |> + unique() |> + dplyr::mutate( + x_start = .data$x_spouse, + x_end = .data$x_pos, + y_start = .data$y_spouse, + y_end = .data$y_pos + ) |> + dplyr::select( + -"spouseID_spouse" + ) + } + return(parent_hash_connections) +} diff --git a/R/calcConnections.X b/R/calcConnections.X new file mode 100644 index 00000000..a1e8eb48 --- /dev/null +++ b/R/calcConnections.X @@ -0,0 +1,329 @@ +#' Calculate connections for a pedigree dataset +#' +#' Computes graphical connection paths for a pedigree layout, including parent-child, +#' sibling, and spousal connections. Optionally processes duplicate appearances +#' of individuals (marked as `extra`) to ensure relational accuracy. +#' +#' @inheritParams ggpedigree +#' @param config List of configuration parameters. Currently unused but passed through to internal helpers. +#' @return A `data.frame` containing connection points and midpoints for graphical rendering. Includes: +#' \itemize{ +#' \item `x_pos`, `y_pos`: positions of focal individual +#' \item `x_dad`, `y_dad`, `x_mom`, `y_mom`: parental positions (if available) +#' \item `x_spouse`, `y_spouse`: spousal positions (if available) +#' \item `x_midparent`, `y_midparent`: midpoint between parents +#' \item `x_mid_sib`, `y_mid_sib`: sibling group midpoint +#' \item `x_mid_spouse`, `y_mid_spouse`: midpoint between spouses +#' } +#' +#' @export + +calculateConnections <- function(ped, + config = list()) { + # Check inputs ----------------------------------------------------------- + if (!inherits(ped, "data.frame")) { + stop("ped should be a data.frame or inherit to a data.frame") + } + if (!all(c("personID", "x_pos", "y_pos", "dadID", "momID") %in% names(ped))) { + stop("ped must contain personID, x_pos, y_pos, dadID, and momID columns") + } + + # Default configuration placeholder + default_config <- list() + config <- utils::modifyList(default_config, config) + + + # Add spouseID if missing + if (!all("spouseID" %in% names(ped))) { + ped$spouseID <- NA + # Attempt to infer spouse based on parenthood (not always reliable) + # this will give you the mom that is the spouse of the dad + # ped$spouseID <- ped$momID[match(ped$personID, ped$dadID)] + # this will give you the dad that is the spouse of the mom + # ped$spouseID <- ped$dadID[match(ped$personID, ped$momID)] + + ped$spouseID <- ifelse(!is.na(ped$momID[match(ped$personID, ped$dadID)]), + ped$momID[match(ped$personID, ped$dadID)], + ped$dadID[match(ped$personID, ped$momID)] + ) + } + # Add famID if missing (used for grouping) + if (!all("famID" %in% names(ped))) { + ped$famID <- 1 + } + + # create a unique parenthash for each individual + # this will be used to identify siblings + if (!all("parenthash" %in% names(ped))) { + ped <- ped |> + dplyr::mutate( + parenthash = paste0(.data$momID, ".", .data$dadID) + ) |> + dplyr::mutate( + parenthash = gsub("NA.NA", NA, .data$parenthash) + ) + } + + # If duplicated appearances exist, resolve which connections to keep + if (sum(ped$extra) > 0) { + ped <- processExtras(ped, config = config) + + # Construct base connection frame + # This will be used for all joins + connections_core <- dplyr::select( + .data = ped, + "personID", + "x_pos", "y_pos", + "dadID", "momID", "parenthash", + "spouseID", + "famID", + "x_otherself", "y_otherself", + "extra","link_as_mom", "link_as_dad", "link_as_spouse", + "link_as_sibling" + ) |> unique() + + + connections_for_moms <- dplyr::filter(connections_core, .data$link_as_mom == TRUE) |> + dplyr::select( + -"extra", + -"link_as_mom", + -"link_as_dad", + -"link_as_spouse", + -"link_as_sibling" + ) + + connections_for_dads <- dplyr::filter(connections_core, .data$link_as_dad == TRUE)|> + dplyr::select( + -"extra", + -"link_as_mom", + -"link_as_dad", + -"link_as_spouse", + -"link_as_sibling" + ) + connections_for_spouses <- dplyr::filter(connections_core, .data$link_as_spouse == TRUE) |> + dplyr::select( + -"extra", + -"link_as_mom", + -"link_as_dad", + -"link_as_spouse", + -"link_as_sibling" + ) + connections_for_sibs <- dplyr::filter(connections_core, .data$link_as_sibling == TRUE) |> + dplyr::select( + -"extra", + -"link_as_mom", + -"link_as_dad", + -"link_as_spouse", + -"link_as_sibling" + ) + } else { # no duplicates + connections_core <- dplyr::select( + .data = ped, + "personID", + "x_pos", "y_pos", + "dadID", "momID", "parenthash", + "spouseID", + "famID", + "extra" + ) |> unique() + + # no duplications, so just use the same connections + connections_for_sibs <- connections_for_spouses <- connections_for_dads <- connections_for_moms <- connections_core + +connections_core <- connections_core |> + dplyr::mutate( + link_as_mom = TRUE, + link_as_dad = TRUE, + link_as_spouse = TRUE, + link_as_sibling = TRUE + ) + + + + } + + # Get mom's coordinates + mom_connections <- getRelativeCoordinates( + ped = ped, + connections = connections_for_moms, + relativeIDvar = "momID", + x_name = "x_mom", + y_name = "y_mom" + ) + + # Get dad's coordinates + dad_connections <- getRelativeCoordinates( + ped = ped, + connections = connections_for_dads, + relativeIDvar = "dadID", + x_name = "x_dad", + y_name = "y_dad" + ) + +# combine for parents + parent_connections <- connections_core |> + dplyr::left_join(mom_connections, + by = c("personID", "momID") + ) |> + dplyr::left_join(dad_connections, + by = c("personID", "dadID") + ) + + # Calculate midpoints between mom and dad in child row + + parent_midpoints <- parent_connections |> + dplyr::filter(!is.na(.data$dadID) & !is.na(.data$momID)) |> + dplyr::group_by(.data$parenthash) |> + dplyr::summarize( + x_midparent = mean(c( + dplyr::first(.data$x_dad), # faster + dplyr::first(.data$x_mom) + )), + y_midparent = mean(c( + dplyr::first(.data$y_dad), + dplyr::first(.data$y_mom) + )), + .groups = "drop" + ) |> unique() + + +# spouses + # Get spouse coordinates + spouse_connections <- build_connections_spouse_segment( + ped, + connections_for_spouses + ) +print(spouse_connections) + # Calculate midpoints between spouses + + +spouse_midpoints <- connections_core |> + dplyr::left_join(spouse_connections, + by = c("spouseID") + ) |> + dplyr::filter(!is.na(.data$spouseID)) |> + dplyr::group_by(.data$spouseID) |> + dplyr::summarize( + x_mid_spouse = mean(c( + dplyr::first(.data$x_pos), + dplyr::first(.data$x_spouse) + )), + y_mid_spouse = mean(c( + dplyr::first(.data$y_pos), + dplyr::first(.data$y_spouse) + )), + .groups = "drop" + ) |> unique() + + + # Combine mom, dad, and spouse coordinates + connections <- connections_core |> + dplyr::left_join(mom_connections, + by = c("personID", "momID") + ) |> + dplyr::left_join(dad_connections, + by = c("personID", "dadID") + ) |> + dplyr::left_join(spouse_connections, + by = c("personID", "spouseID") + ) |> unique() + + + + # Calculate sibling group midpoints + sibling_midpoints <- connections |> + dplyr::filter( + !is.na(.data$momID) & !is.na(.data$dadID) & # biological parents defined + !is.na(.data$x_mom) & !is.na(.data$y_mom) & # mom’s coordinates linked + !is.na(.data$x_dad) & !is.na(.data$y_dad) # dad’s coordinates linked + ) |> + dplyr::group_by( + .data$parenthash, + .data$x_mom, .data$y_mom, + .data$x_dad, .data$y_dad + ) |> + dplyr::summarize( + x_mid_sib = mean(.data$x_pos), + y_mid_sib = dplyr::first(.data$y_pos), + .groups = "drop" + ) |> unique() + + + # Merge midpoints into connections + connections <- connections |> + dplyr::left_join(parent_midpoints, + by = c("parenthash") + ) |> + dplyr::left_join(spouse_midpoints, + by = c("spouseID") + ) |> + dplyr::left_join(sibling_midpoints, + by = c("parenthash","x_mom", "y_mom", + "x_dad", "y_dad") + ) |> + dplyr::mutate( + x_mid_sib = dplyr::case_when( + is.na(.data$x_dad) & is.na(.data$x_mom) ~ NA_real_, + !is.na(.data$x_mid_sib) ~ .data$x_mid_sib, + (!is.na(.data$momID) & !is.na(.data$x_mom)) | (!is.na(.data$dadID) & !is.na(.data$x_dad)) ~ .data$x_pos, + TRUE ~ NA_real_ + ), + y_mid_sib = dplyr::case_when( + is.na(.data$y_dad) & is.na(.data$y_mom) ~ NA_real_, + + !is.na(.data$y_mid_sib) ~ .data$y_mid_sib, + (!is.na(.data$momID) & !is.na(.data$y_mom)) | (!is.na(.data$dadID) & !is.na(.data$y_dad)) ~ .data$y_pos, + TRUE ~ NA_real_ + ) + ) |> unique() + + plotting_connections <- list( + connections = connections, + connections_core = connections_core, + connections_spouse_segment = connections_spouse_segment, # Spouse link between two parents, needs + # x = .data$x_spouse, + # xend = .data$x_pos, + # y = .data$y_spouse, + # yend = .data$y_pos + connections_parent_segment = NULL, # Parent-child stub (child to mid-sibling point) + connections_offspring_segment = NULL, # Mid-sibling to parents midpoint + connections_sibling_segment = NULL, # Sibling vertical drop line + ped = ped + ) + + + return(connections) +} + + +build_connections_spouse_segment <- function(ped,connections_for_spouses) { + + # spouses + # Get spouse coordinates + spouse_connections <- ped |> + dplyr::select( + "personID", "x_pos", + "y_pos", "spouseID" + ) |> dplyr::filter(!is.na(.data$spouseID)) |> + dplyr::left_join(connections_for_spouses, + by = c("spouseID" = "personID"), + suffix = c("", "_spouse"), + multiple = "any" + ) |> + dplyr::rename( + x_spouse = "x_pos_spouse", + y_spouse = "y_pos_spouse" + ) |> unique() |> + dplyr::mutate( + x_start = .data$x_spouse, + x_end = .data$x_pos, + y_start = .data$y_spouse, + y_end = .data$y_pos + ) |> select( + -"spouseID_spouse" + ) + + return(spouse_connections) +} + + diff --git a/R/calcConnectionsHelpers.R b/R/calcConnectionsHelpers.R new file mode 100644 index 00000000..a651e7de --- /dev/null +++ b/R/calcConnectionsHelpers.R @@ -0,0 +1,228 @@ +#' Compute distance between two points +#' +#' This function calculates the distance between two points in a 2D space using +#' Minkowski distance. It can be used to compute Euclidean or Manhattan distance. +#' It is a utility function for calculating distances in pedigree layouts. +#' Defaults to Euclidean distance if no method is specified. +#' +#' +#' @param x1 Numeric. X-coordinate of the first point. +#' @param y1 Numeric. Y-coordinate of the first point. +#' @param x2 Numeric. X-coordinate of the second point. +#' @param y2 Numeric. Y-coordinate of the second point. +#' @param method Character. Method of distance calculation. Options are "euclidean", "cityblock", and "Minkowski". +#' @param p Numeric. The order of the Minkowski distance. If NULL, defaults to 2 for Euclidean and 1 for Manhattan. If +#' Minkowski method is used, p should be specified. + +computeDistance <- function(x1, y1, x2, y2, + method = "euclidean", p = NULL) { + method <- tolower(method) + + if (is.null(p)) { + p <- switch(method, + euclidean = 2, + cityblock = 1, + stop("Invalid distance method. Choose from 'euclidean', 'cityblock', or specify p.") + ) + } + # Calculate Minkowski distance + + ((abs(x1 - x2))^p + (abs(y1 - y2))^p)^(1 / p) +} + +#' Compute midpoints across grouped coordinates +#' +#' A flexible utility function to compute x and y midpoints for groups of individuals +#' using a specified method. Used to support positioning logic for sibling groups, +#' parental dyads, or spousal pairs in pedigree layouts. +#' @param data A `data.frame` containing the coordinate and grouping variables. +#' @param group_vars Character vector. Names of the grouping variables. +#' @param x_vars Character vector. Names of the x-coordinate variables to be averaged. +#' @param y_vars Character vector. Names of the y-coordinate variables to be averaged. +#' @param x_out Character. Name of the output column for the x-coordinate midpoint. +#' @param y_out Character. Name of the output column for the y-coordinate midpoint. +#' @param method Character. Method for calculating midpoints. Options include: +#' \itemize{ +#' \item `"mean"`: Arithmetic mean of the coordinates. +#' \item `"median"`: Median of the coordinates. +#' \item `"weighted_mean"`: Weighted mean of the coordinates. +#' \item `"first_pair"`: Mean of the first pair of coordinates. +#' \item `"meanxfirst"`: Mean of the x-coordinates and first y-coordinate. +#' \item `"meanyfirst"`: Mean of the y-coordinates and first x-coordinate. +#' } +#' @param require_non_missing Character vector. Names of variables that must not be missing for the row to be included. + +#' @return A `data.frame` grouped by `group_vars` with new columns `x_out` and `y_out` containing midpoint coordinates. +#' @keywords internal + +getMidpoints <- function(data, group_vars, + x_vars, y_vars, + x_out, y_out, method = "mean", + require_non_missing = group_vars) { + # ----- + # Filter for complete data if requested + if (!is.null(require_non_missing)) { + data <- data |> + dplyr::filter( + dplyr::if_all(!!!rlang::syms(require_non_missing), ~ !is.na(.)) + ) + } + + # ----- + # Apply selected midpoint method + # ----- + + if (method == "mean") { + # Average all xs and Average of all y values + + data |> + dplyr::group_by(!!!rlang::syms(group_vars)) |> + dplyr::summarize( + !!x_out := mean(c(!!!rlang::syms(x_vars)), na.rm = TRUE), + !!y_out := mean(c(!!!rlang::syms(y_vars)), na.rm = TRUE), + .groups = "drop" + ) + } else if (method == "median") { + # Median of all xs and Median of all y values + data |> + dplyr::group_by(!!!rlang::syms(group_vars)) |> + dplyr::summarize( + !!x_out := stats::median(c(!!!rlang::syms(x_vars)), na.rm = TRUE), + !!y_out := stats::median(c(!!!rlang::syms(y_vars)), na.rm = TRUE), + .groups = "drop" + ) + } else if (method == "weighted_mean") { + # Weighted average (same weight for all unless specified externally) + + data |> + dplyr::group_by(!!!rlang::syms(group_vars)) |> + dplyr::summarize( + !!x_out := stats::weighted.mean(c(!!!rlang::syms(x_vars)), na.rm = TRUE), + !!y_out := stats::weighted.mean(c(!!!rlang::syms(y_vars)), na.rm = TRUE), + .groups = "drop" + ) + } else if (method == "first_pair") { + # Use only the first value in each pair of x/y coordinates + # This is useful for spousal pairs or sibling groups + data |> + dplyr::group_by(!!!rlang::syms(group_vars)) |> + dplyr::summarize( + !!x_out := mean(c( + dplyr::first(.data[[x_vars[1]]]), + dplyr::first(.data[[x_vars[2]]]) + ), na.rm = TRUE), + !!y_out := mean(c( + dplyr::first(.data[[y_vars[1]]]), + dplyr::first(.data[[y_vars[2]]]) + ), na.rm = TRUE), + .groups = "drop" + ) + } else if (method == "meanxfirst") { + # Use the mean of all x coordinates and the first y coordinate + data |> + dplyr::group_by(!!!rlang::syms(group_vars)) |> + dplyr::summarize( + !!x_out := mean(c(!!!rlang::syms(x_vars)), na.rm = TRUE), + !!y_out := mean(c( + dplyr::first(.data[[y_vars[1]]]), + dplyr::first(.data[[y_vars[2]]]) + ), na.rm = TRUE), + .groups = "drop" + ) + } else if (method == "meanyfirst") { + # First x, mean of all y + data |> + dplyr::group_by(!!!rlang::syms(group_vars)) |> + dplyr::summarize( + !!x_out := mean(c( + dplyr::first(.data[[x_vars[1]]]), + dplyr::first(.data[[x_vars[2]]]) + ), na.rm = TRUE), + !!y_out := mean(c(!!!rlang::syms(y_vars)), na.rm = TRUE), + .groups = "drop" + ) + } else { + # Handle unsupported method argument + stop("Unsupported method.") + } +} + + +#' Get coordinate positions of relatives for each individual +#' +#' Helper function used to retrieve the x and y coordinates of a specified relative +#' (e.g., mom, dad, spouse) and join them into the main connection table. This supports +#' relative-specific positioning in downstream layout functions like `calculateConnections()`. +#' +#' @inheritParams ggpedigree +#' @param connections A `data.frame` containing the individuals and their associated relative IDs. +#' @param relativeIDvar Character. Name of the column in `connections` for the relative ID variable. +#' @param x_name Character. Name of the new column to store the x-coordinate of the relative. +#' @param y_name Character. Name of the new column to store the y-coordinate of the relative. +#' @param multiple Character. Specifies how to handle multiple matches. Options are "all" or "any". +#' @param only_unique Logical. If TRUE, return only unique rows. Defaults to TRUE. +#' +#' @return A `data.frame` with columns: +#' \itemize{ +#' \item `personID`, `relativeIDvar` +#' \item `x_name`, `y_name`: Coordinates of the specified relative +#' \item Optionally, `newID` if present in `ped` +#' } +#' @keywords internal + + +getRelativeCoordinates <- function(ped, connections, relativeIDvar, x_name, y_name, + # relationship = "one-to-one", + personID = "personID", + multiple = "all", + only_unique = TRUE) { + # Filter only rows where the relative ID is not missing + # and join with the main pedigree data frame + rel_connections <- connections |> + dplyr::filter(!is.na(.data[[relativeIDvar]])) |> + # Join in the relative's coordinates from `ped`, based on relative ID + dplyr::left_join( + ped, + by = stats::setNames(personID, relativeIDvar), + suffix = c("", "_rel"), + # relationship = relationship, + multiple = multiple + ) |> + # Rename the joined coordinate columns to the specified x/y output names + dplyr::rename( + !!x_name := "x_pos_rel", + !!y_name := "y_pos_rel" + ) + # If the ped includes a 'newID' column (used to track duplicates), retain it in the result + if ("newID" %in% names(ped)) { + rel_connections <- rel_connections |> + dplyr::select( + !!personID, + "newID", + !!relativeIDvar, + !!x_name, + !!y_name + ) + } else { + rel_connections <- rel_connections |> + dplyr::select( + !!personID, + !!relativeIDvar, + !!x_name, + !!y_name + ) + } + if (only_unique == TRUE) { + rel_connections <- unique(rel_connections) + } + + return(rel_connections) +} + + +symKey <- function(id1, id2, sep = ".") { + dplyr::if_else(id1 < id2, + paste0(id1, sep, id2), + paste0(id2, sep, id1) + ) +} diff --git a/R/calcCoordinates.R b/R/calcCoordinates.R index ff349bd4..ad06104b 100644 --- a/R/calcCoordinates.R +++ b/R/calcCoordinates.R @@ -25,7 +25,9 @@ utils::globalVariables(c(":=")) #' \item `nid`: Internal numeric identifier for layout mapping. #' \item `extra`: Logical flag indicating whether this row is a secondary appearance. #' } +#' #' @export + calculateCoordinates <- function(ped, personID = "ID", momID = "momID", dadID = "dadID", spouseID = "spouseID", sexVar = "sex", @@ -64,6 +66,30 @@ calculateCoordinates <- function(ped, personID = "ID", momID = "momID", momid = ped[[momID]], sex = ped_recode[[sexVar]], ) +# + if ("hints" %in% names(config)) { + # Check if hints are provided + autohint <- tryCatch( + kinship2::autohint(ped_ped, config$hints, + align = config$ped_align, + packed = config$ped_packed + ), + error = function(e) { + warning("Your hints caused an error and were not used. Using default hints instead.") + kinship2::autohint( + ped_ped, + align = config$ped_align, + packed = config$ped_packed + )} + ) + } else { + autohint <- kinship2::autohint(ped_ped, + align = config$ped_align, + packed = config$ped_packed + ) + } + + # ----- # Extract layout information @@ -74,7 +100,8 @@ calculateCoordinates <- function(ped, personID = "ID", momID = "momID", pos <- kinship2::align.pedigree(ped_ped, packed = config$ped_packed, align = config$ped_align, - width = config$ped_width + width = config$ped_width, + hints = autohint ) # Extract layout information @@ -83,7 +110,9 @@ calculateCoordinates <- function(ped, personID = "ID", momID = "momID", # Flatten coordinate matrix - pos_vector <- as.vector(pos$pos) + # pos_vector <- as.vector(pos$pos) + # spouse_vector <- as.vector(pos$spouse) + # Initialize coordinate columns in the data frame ped$nid <- NA @@ -100,11 +129,21 @@ calculateCoordinates <- function(ped, personID = "ID", momID = "momID", y_coords <- rep(NA, length(nid_vector)) x_pos <- rep(NA, length(nid_vector)) + # Initialize spouse vector + spouse_vector <- rep(NA, length(nid_vector)) + + # A matrix with values + # 1 = subject plotted to the immediate right is a spouse + # 2 = subject plotted to the immediate right is an inbred spouse + # 0 = not a spouse + + # Populate coordinates from nid positions for (i in seq_along(nid_vector)) { y_coords[i] <- nid_pos[i, "row"] x_coords[i] <- nid_pos[i, "col"] x_pos[i] <- pos$pos[nid_pos[i, "row"], nid_pos[i, "col"]] + spouse_vector[i] <- pos$spouse[nid_pos[i, "row"], nid_pos[i, "col"]] } # ----- @@ -121,6 +160,8 @@ calculateCoordinates <- function(ped, personID = "ID", momID = "momID", ped$y_order <- y_coords[tmp] ped$x_pos <- x_pos[tmp] ped$y_pos <- y_coords[tmp] + ped$spousehint <- spouse_vector[tmp] + # Detect multiple layout positions for the same individual # This can happen if the same individual appears multiple times in the pedigree @@ -169,672 +210,8 @@ calculateCoordinates <- function(ped, personID = "ID", momID = "momID", ped <- rbind(ped, ped_extra) } else { ped_extra <- NULL + ped$extra <- FALSE } return(ped) } - -#' Calculate connections for a pedigree dataset -#' -#' Computes graphical connection paths for a pedigree layout, including parent-child, -#' sibling, and spousal connections. Optionally processes duplicate appearances -#' of individuals (marked as `extra`) to ensure relational accuracy. -#' -#' @inheritParams ggpedigree -#' @param config List of configuration parameters. Currently unused but passed through to internal helpers. -#' @return A `data.frame` containing connection points and midpoints for graphical rendering. Includes: -#' \itemize{ -#' \item `x_pos`, `y_pos`: positions of focal individual -#' \item `x_dad`, `y_dad`, `x_mom`, `y_mom`: parental positions (if available) -#' \item `x_spouse`, `y_spouse`: spousal positions (if available) -#' \item `x_midparent`, `y_midparent`: midpoint between parents -#' \item `x_mid_sib`, `y_mid_sib`: sibling group midpoint -#' \item `x_mid_spouse`, `y_mid_spouse`: midpoint between spouses -#' } -#' -#' @export - -calculateConnections <- function(ped, - config = list()) { - # Check inputs ----------------------------------------------------------- - if (!inherits(ped, "data.frame")) { - stop("ped should be a data.frame or inherit to a data.frame") - } - if (!all(c("personID", "x_pos", "y_pos", "dadID", "momID") %in% names(ped))) { - stop("ped must contain personID, x_pos, y_pos, dadID, and momID columns") - } - - # Default configuration placeholder - default_config <- list() - config <- utils::modifyList(default_config, config) - - - # Add spouseID if missing - if (!all("spouseID" %in% names(ped))) { - ped$spouseID <- NA - # Attempt to infer spouse based on parenthood (not always reliable) - # this will give you the mom that is the spouse of the dad - # ped$spouseID <- ped$momID[match(ped$personID, ped$dadID)] - # this will give you the dad that is the spouse of the mom - # ped$spouseID <- ped$dadID[match(ped$personID, ped$momID)] - - ped$spouseID <- ifelse(!is.na(ped$momID[match(ped$personID, ped$dadID)]), - ped$momID[match(ped$personID, ped$dadID)], - ped$dadID[match(ped$personID, ped$momID)] - ) - } - # Add famID if missing (used for grouping) - if (!all("famID" %in% names(ped))) { - ped$famID <- 1 - } - - # If duplicated appearances exist, resolve which connections to keep - if ("extra" %in% names(ped)) { - ped <- processExtras(ped, config = config) - } - - # Construct base connection frame - # This will be used for all joins - - if ("x_otherself" %in% names(ped)) { - connections <- dplyr::select( - .data = ped, - "personID", - "x_pos", "y_pos", - "dadID", "momID", - "spouseID", - "famID", - "x_otherself", "y_otherself" - ) - } else { - connections <- dplyr::select( - .data = ped, - "personID", - "x_pos", "y_pos", - "dadID", "momID", - "spouseID", - "famID" - ) - } - - # Get mom's coordinates - mom_connections <- getRelativeCoordinates( - ped = ped, - connections = connections, - relativeIDvar = "momID", - x_name = "x_mom", - y_name = "y_mom" - ) - - # Get dad's coordinates - dad_connections <- getRelativeCoordinates( - ped = ped, - connections = connections, - relativeIDvar = "dadID", - x_name = "x_dad", - y_name = "y_dad" - ) - - # Get spouse coordinates - spouse_connections <- ped |> - dplyr::select( - "personID", "x_pos", - "y_pos", "spouseID" - ) |> - dplyr::left_join(ped, - by = c("spouseID" = "personID"), - suffix = c("", "_spouse"), - multiple = "any" - ) |> - dplyr::rename( - x_spouse = "x_pos_spouse", - y_spouse = "y_pos_spouse" - ) |> - dplyr::select( - "personID", "spouseID", - "x_spouse", "y_spouse" - ) - - # Combine mom, dad, and spouse coordinates - connections <- connections |> - dplyr::left_join(mom_connections, - by = c("personID", "momID") - ) |> - dplyr::left_join(dad_connections, - by = c("personID", "dadID") - ) |> - dplyr::left_join(spouse_connections, - by = c("personID", "spouseID") - ) - - # Calculate midpoints between mom and dad - parent_midpoints <- connections |> - dplyr::filter(!is.na(.data$dadID) & !is.na(.data$momID)) |> - dplyr::group_by(.data$dadID, .data$momID) |> - dplyr::summarize( - x_midparent = mean(c( - dplyr::first(.data$x_dad), - dplyr::first(.data$x_mom) - )), - y_midparent = mean(c( - dplyr::first(.data$y_dad), - dplyr::first(.data$y_mom) - )), - .groups = "drop" - ) - - # Calculate midpoints between spouses - spouse_midpoints <- connections |> - dplyr::filter(!is.na(.data$spouseID)) |> - dplyr::group_by(.data$spouseID) |> - dplyr::summarize( - x_mid_spouse = mean(c( - dplyr::first(.data$x_pos), - dplyr::first(.data$x_spouse) - )), - y_mid_spouse = mean(c( - dplyr::first(.data$y_pos), - dplyr::first(.data$y_spouse) - )), - .groups = "drop" - ) - - # Calculate sibling group midpoints - sibling_midpoints <- connections |> - dplyr::filter(!is.na(.data$dadID) & !is.na(.data$momID)) |> - dplyr::group_by( - .data$dadID, - .data$momID - ) |> - dplyr::summarize( - x_mid_sib = mean(.data$x_pos), - y_mid_sib = dplyr::first(.data$y_pos), - .groups = "drop" - ) - - - # Merge midpoints into connections - connections <- connections |> - dplyr::left_join(parent_midpoints, - by = c("dadID", "momID") - ) |> - dplyr::left_join(spouse_midpoints, - by = c("spouseID") - ) |> - dplyr::left_join(sibling_midpoints, - by = c("dadID", "momID") - ) |> - dplyr::mutate( - x_mid_sib = dplyr::case_when( - is.na(.data$x_mid_sib) & !is.na(.data$dadID) & !is.na(.data$momID) ~ .data$x_pos, - !is.na(.data$x_mid_sib) ~ .data$x_mid_sib, - TRUE ~ NA_real_ - ), - y_mid_sib = dplyr::case_when( - is.na(.data$y_mid_sib) & !is.na(.data$dadID) & !is.na(.data$momID) ~ .data$y_pos, - !is.na(.data$y_mid_sib) ~ .data$y_mid_sib, - TRUE ~ NA_real_ - ) - ) - - return(connections) -} -#' Get coordinate positions of relatives for each individual -#' -#' Helper function used to retrieve the x and y coordinates of a specified relative -#' (e.g., mom, dad, spouse) and join them into the main connection table. This supports -#' relative-specific positioning in downstream layout functions like `calculateConnections()`. -#' -#' @inheritParams ggpedigree -#' @param connections A `data.frame` containing the individuals and their associated relative IDs. -#' @param relativeIDvar Character. Name of the column in `connections` for the relative ID variable. -#' @param x_name Character. Name of the new column to store the x-coordinate of the relative. -#' @param y_name Character. Name of the new column to store the y-coordinate of the relative. -#' @param multiple Character. Specifies how to handle multiple matches. Options are "all" or "any". -#' -#' @return A `data.frame` with columns: -#' \itemize{ -#' \item `personID`, `relativeIDvar` -#' \item `x_name`, `y_name`: Coordinates of the specified relative -#' \item Optionally, `newID` if present in `ped` -#' } -#' @keywords internal - - -getRelativeCoordinates <- function(ped, connections, relativeIDvar, x_name, y_name, - # relationship = "one-to-one", - personID = "personID", - multiple = "all") { - # Filter only rows where the relative ID is not missing - # and join with the main pedigree data frame - rel_connections <- connections |> - dplyr::filter(!is.na(.data[[relativeIDvar]])) |> - # Join in the relative's coordinates from `ped`, based on relative ID - dplyr::left_join( - ped, - by = stats::setNames(personID, relativeIDvar), - suffix = c("", "_rel"), - # relationship = relationship, - multiple = multiple - ) |> - # Rename the joined coordinate columns to the specified x/y output names - dplyr::rename( - !!x_name := "x_pos_rel", - !!y_name := "y_pos_rel" - ) - # If the ped includes a 'newID' column (used to track duplicates), retain it in the result - if ("newID" %in% names(ped)) { - rel_connections <- rel_connections |> - dplyr::select( - !!personID, - "newID", - !!relativeIDvar, - !!x_name, - !!y_name - ) - } else { - rel_connections <- rel_connections |> - dplyr::select( - !!personID, - !!relativeIDvar, - !!x_name, - !!y_name - ) - } - - return(rel_connections) -} - -#' Compute midpoints across grouped coordinates -#' -#' A flexible utility function to compute x and y midpoints for groups of individuals -#' using a specified method. Used to support positioning logic for sibling groups, -#' parental dyads, or spousal pairs in pedigree layouts. -#' @param data A `data.frame` containing the coordinate and grouping variables. -#' @param group_vars Character vector. Names of the grouping variables. -#' @param x_vars Character vector. Names of the x-coordinate variables to be averaged. -#' @param y_vars Character vector. Names of the y-coordinate variables to be averaged. -#' @param x_out Character. Name of the output column for the x-coordinate midpoint. -#' @param y_out Character. Name of the output column for the y-coordinate midpoint. -#' @param method Character. Method for calculating midpoints. Options include: -#' \itemize{ -#' \item `"mean"`: Arithmetic mean of the coordinates. -#' \item `"median"`: Median of the coordinates. -#' \item `"weighted_mean"`: Weighted mean of the coordinates. -#' \item `"first_pair"`: Mean of the first pair of coordinates. -#' \item `"meanxfirst"`: Mean of the x-coordinates and first y-coordinate. -#' \item `"meanyfirst"`: Mean of the y-coordinates and first x-coordinate. -#' } -#' @param require_non_missing Character vector. Names of variables that must not be missing for the row to be included. - -#' @return A `data.frame` grouped by `group_vars` with new columns `x_out` and `y_out` containing midpoint coordinates. -#' @keywords internal - -getMidpoints <- function(data, group_vars, - x_vars, y_vars, - x_out, y_out, method = "mean", - require_non_missing = group_vars) { - # ----- - # Filter for complete data if requested - if (!is.null(require_non_missing)) { - data <- data |> - dplyr::filter( - dplyr::if_all(!!!rlang::syms(require_non_missing), ~ !is.na(.)) - ) - } - - # ----- - # Apply selected midpoint method - # ----- - - if (method == "mean") { - # Average all xs and Average of all y values - - data |> - dplyr::group_by(!!!rlang::syms(group_vars)) |> - dplyr::summarize( - !!x_out := mean(c(!!!rlang::syms(x_vars)), na.rm = TRUE), - !!y_out := mean(c(!!!rlang::syms(y_vars)), na.rm = TRUE), - .groups = "drop" - ) - } else if (method == "median") { - # Median of all xs and Median of all y values - data |> - dplyr::group_by(!!!rlang::syms(group_vars)) |> - dplyr::summarize( - !!x_out := stats::median(c(!!!rlang::syms(x_vars)), na.rm = TRUE), - !!y_out := stats::median(c(!!!rlang::syms(y_vars)), na.rm = TRUE), - .groups = "drop" - ) - } else if (method == "weighted_mean") { - # Weighted average (same weight for all unless specified externally) - - data |> - dplyr::group_by(!!!rlang::syms(group_vars)) |> - dplyr::summarize( - !!x_out := stats::weighted.mean(c(!!!rlang::syms(x_vars)), na.rm = TRUE), - !!y_out := stats::weighted.mean(c(!!!rlang::syms(y_vars)), na.rm = TRUE), - .groups = "drop" - ) - } else if (method == "first_pair") { - # Use only the first value in each pair of x/y coordinates - # This is useful for spousal pairs or sibling groups - data |> - dplyr::group_by(!!!rlang::syms(group_vars)) |> - dplyr::summarize( - !!x_out := mean(c( - dplyr::first(.data[[x_vars[1]]]), - dplyr::first(.data[[x_vars[2]]]) - ), na.rm = TRUE), - !!y_out := mean(c( - dplyr::first(.data[[y_vars[1]]]), - dplyr::first(.data[[y_vars[2]]]) - ), na.rm = TRUE), - .groups = "drop" - ) - } else if (method == "meanxfirst") { - # Use the mean of all x coordinates and the first y coordinate - data |> - dplyr::group_by(!!!rlang::syms(group_vars)) |> - dplyr::summarize( - !!x_out := mean(c(!!!rlang::syms(x_vars)), na.rm = TRUE), - !!y_out := mean(c( - dplyr::first(.data[[y_vars[1]]]), - dplyr::first(.data[[y_vars[2]]]) - ), na.rm = TRUE), - .groups = "drop" - ) - } else if (method == "meanyfirst") { - # First x, mean of all y - data |> - dplyr::group_by(!!!rlang::syms(group_vars)) |> - dplyr::summarize( - !!x_out := mean(c( - dplyr::first(.data[[x_vars[1]]]), - dplyr::first(.data[[x_vars[2]]]) - ), na.rm = TRUE), - !!y_out := mean(c(!!!rlang::syms(y_vars)), na.rm = TRUE), - .groups = "drop" - ) - } else { - # Handle unsupported method argument - stop("Unsupported method.") - } -} - -#' Process duplicate appearances of individuals in a pedigree layout -#' -#' Resolves layout conflicts when the same individual appears in multiple places -#' (e.g., due to inbreeding loops). Keeps the layout point that is closest to a relevant -#' relative (mom, dad, or spouse) and removes links to others to avoid confusion in visualization. -#' -#' @param ped A data.frame containing pedigree layout info with columns including: -#' `personID`, `x_pos`, `y_pos`, `dadID`, `momID`, and a logical column `extra`. -#' @param config A list of configuration options. Currently unused but passed through to internal helpers. -#' -#' @return A modified `ped` data.frame with updated coordinates and removed duplicates. -#' -#' @keywords internal - -processExtras <- function(ped, config = list()) { - # ----- - # Check inputs - # ----- - if (!inherits(ped, "data.frame")) { - stop("ped should be a data.frame or inherit to a data.frame") - } - if (!all(c("personID", "x_pos", "y_pos", "dadID", "momID") %in% names(ped))) { - stop("ped must contain personID, x_pos, y_pos, dadID, and momID columns") - } - - # default config - default_config <- list() - config <- utils::modifyList(default_config, config) - - # ----- - # Identify duplicated individuals - # ----- - - # Find all individuals with extra appearances - idsextras <- dplyr::filter(ped, .data$extra == TRUE) |> - dplyr::select("personID") |> - dplyr::pull() |> - unique() - - - # Assign unique ID per row for later use - ped$newID <- 1:nrow(ped) - - - # ----- - # Subset to duplicated entries only # note that tidyselect hates .data pronouns - # ----- - extras <- dplyr::filter(ped, .data$personID %in% idsextras) |> - dplyr::select( - "newID", - "personID", - "x_pos", "y_pos", - "dadID", "momID", - "spouseID" - ) - - # ----- - # Get coordinate positions of relatives and other-self - # ----- - - # Mother's coordinates - mom_coords <- getRelativeCoordinates( - ped = ped, - connections = extras, - relativeIDvar = "momID", - x_name = "x_mom", - y_name = "y_mom", - multiple = "any" - ) - - # Father's coordinates - - dad_coords <- getRelativeCoordinates( - ped = ped, - connections = extras, - relativeIDvar = "dadID", - x_name = "x_dad", - y_name = "y_dad", - multiple = "any" - ) - - # Spouse's coordinates - spouse_coords <- getRelativeCoordinates( - ped = ped, - connections = extras, - relativeIDvar = "spouseID", - x_name = "x_spouse", - y_name = "y_spouse", - multiple = "all" - ) - - # Coordinates of the individual's other appearance ("self") - self_coords <- extras |> - dplyr::left_join( - ped, - by = c("personID"), - suffix = c("", "_other"), - # relationship = relationship, - multiple = "all" - ) |> - dplyr::filter(.data$newID != .data$newID_other) |> - dplyr::mutate( - x_otherself = .data$x_pos_other, - y_otherself = .data$y_pos_other - ) |> - dplyr::select( - .data$newID, - .data$personID, - .data$newID_other, - .data$x_otherself, - .data$y_otherself - ) - - # ----- - # Merge coordinates into the extra rows - # ----- - - extras <- extras |> - dplyr::left_join(mom_coords, by = c("newID", "personID", "momID")) |> - dplyr::left_join(dad_coords, by = c("newID", "personID", "dadID")) |> - dplyr::left_join(self_coords, by = c("newID", "personID")) |> - dplyr::left_join(spouse_coords, - by = c("newID", "personID", "spouseID"), - multiple = "all" - ) - - - # ----- - # Compute Euclidean distances between this appearance and: - # - mom, dad, spouse - # - same individual in other location (otherself) - # These will be used to choose the "closest" relationship. - # ----- - extras <- extras |> - dplyr::mutate( - dist_mom = sqrt((.data$x_pos - .data$x_mom)^2 + (.data$y_pos - .data$y_mom)^2), - dist_mom_other = sqrt((.data$x_otherself - .data$x_mom)^2 + (.data$y_otherself - .data$y_mom)^2), - dist_dad = sqrt((.data$x_pos - .data$x_dad)^2 + (.data$y_pos - .data$y_dad)^2), - dist_dad_other = sqrt((.data$x_otherself - .data$x_dad)^2 + (.data$y_otherself - .data$y_dad)^2), - dist_spouse = sqrt((.data$x_pos - .data$x_spouse)^2 + (.data$y_pos - .data$y_spouse)^2), - dist_spouse_other = sqrt((.data$x_otherself - .data$x_spouse)^2 + (.data$y_otherself - .data$y_spouse)^2), - dist_otherself = sqrt((.data$x_pos - .data$x_otherself)^2 + (.data$y_pos - .data$y_otherself)^2) - ) - - # ----- - # When there are multiple spouses, keep only the appearance - # where the individual is closest to one of their spouses. - # ----- - - extras <- extras |> - dplyr::group_by(.data$newID, .data$personID) |> - dplyr::mutate( - min_spouse = min(.data$dist_spouse, na.rm = TRUE), - num_spouse = dplyr::n() - ) |> - dplyr::ungroup() - extras <- extras |> - dplyr::filter(.data$num_spouse == 1 | .data$dist_spouse == .data$min_spouse) |> - dplyr::select( - -.data$min_spouse, - -.data$num_spouse - ) - - - # ----- - # Determine the "closest relative" to this duplicated row - # ----- - - - - # For each duplicated appearance, we now ask: - # - Is this appearance closer to mom than the otherself copy is? - # - Same for dad? For spouse? - - extras <- extras |> - dplyr::mutate( - mom_closer = dplyr::case_when( - .data$dist_mom < .data$dist_mom_other ~ TRUE, - .data$dist_mom_other < .data$dist_mom ~ FALSE, - TRUE ~ NA - ), - dad_closer = dplyr::case_when( - .data$dist_dad < .data$dist_dad_other ~ TRUE, - .data$dist_dad_other < .data$dist_dad ~ FALSE, - TRUE ~ NA - ), - spouse_closer = dplyr::case_when( - .data$dist_spouse < .data$dist_spouse_other ~ TRUE, - .data$dist_spouse_other < .data$dist_spouse ~ FALSE, - TRUE ~ NA - ) - ) - - - # Then: - # - Determine which of mom, dad, or spouse is closest in absolute terms - # - Use that to decide whether to retain connections to that relative - - extras <- extras |> - dplyr::mutate( - closest_relative = dplyr::case_when( - .data$dist_mom <= .data$dist_dad & .data$dist_mom <= .data$dist_spouse ~ "mom", - .data$dist_dad < .data$dist_mom & .data$dist_dad <= .data$dist_spouse ~ "dad", - TRUE ~ "spouse" - ) - ) - - # ----- - # Based on which relative is closest, determine which links to keep - # ----- - - extras <- extras |> - dplyr::mutate( - keep_parents = dplyr::case_when( - c("mom", "dad") %in% .data$closest_relative & .data$mom_closer == TRUE & .data$dad_closer == TRUE ~ TRUE, - c("mom", "dad") %in% .data$closest_relative & .data$mom_closer == TRUE & .data$dad_closer == FALSE ~ TRUE, - c("mom", "dad") %in% .data$closest_relative & .data$mom_closer == FALSE & .data$dad_closer == TRUE ~ TRUE, - TRUE ~ FALSE - ), - keep_spouse = dplyr::case_when( - c("spouse") %in% .data$closest_relative & .data$spouse_closer == TRUE ~ TRUE, - c("spouse") %in% .data$closest_relative & .data$spouse_closer == FALSE ~ FALSE, - TRUE ~ FALSE - ) - ) - - # ----- - # Final subset of relevant decision columns - # ----- - - skinnyextras <- extras |> - dplyr::select( - .data$newID, - .data$closest_relative, - .data$keep_parents, - .data$keep_spouse, - .data$x_otherself, - .data$y_otherself - ) - - - # ----- - # Apply decisions to main pedigree - # Removes connection references for non-kept parents/spouses - # ----- - - ped <- ped |> - dplyr::left_join(skinnyextras, - by = c("newID"), suffix = c("", "_"), - relationship = "one-to-one" - ) |> - dplyr::mutate( - spouseID = dplyr::case_when( - .data$keep_spouse == TRUE ~ .data$spouseID, - is.na(.data$closest_relative) ~ .data$spouseID, - TRUE ~ NA_real_ - ), - momID = dplyr::case_when( - .data$keep_parents == TRUE ~ .data$momID, - is.na(.data$closest_relative) ~ .data$momID, - TRUE ~ NA_real_ - ), - dadID = dplyr::case_when( - .data$keep_parents == TRUE ~ .data$dadID, - is.na(.data$closest_relative) ~ .data$dadID, - TRUE ~ NA_real_ - ) - ) |> - dplyr::select( - -"newID", - -"extra", - -"closest_relative" - ) - - return(ped) -} diff --git a/R/ggpedigree.R b/R/ggpedigree.R index 977975a8..4ab13b99 100644 --- a/R/ggpedigree.R +++ b/R/ggpedigree.R @@ -11,12 +11,15 @@ #' @param momID Character string specifying the column name for mother IDs. Defaults to "momID". #' @param dadID Character string specifying the column name for father IDs. Defaults to "dadID". #' @param status_col Character string specifying the column name for affected status. Defaults to NULL. +#' @param debug Logical. If TRUE, prints debugging information. Default: FALSE. +#' @param hints Data frame with hints for layout adjustments. Default: NULL. +#' @param ... Additional arguments passed to `ggplot2` functions. #' @param config A list of configuration options for customizing the plot. The list can include: #' \describe{ #' \item{code_male}{Integer or string. Value identifying males in the sex column. (typically 0 or 1) Default: 1.} #' \item{spouse_segment_color, self_segment_color, sibling_segment_color, parent_segment_color, offspring_segment_color}{Character. Line colors for respective connection types.} -#' \item{text_size, point_size, line_width}{Numeric. Controls text size, point size, and line thickness.} -#' \item{generation_gap}{Numeric. Vertical spacing multiplier between generations. Default: 1.} +#' \item{label_text_size, point_size, line_width}{Numeric. Controls text size, point size, and line thickness.} +#' \item{generation_height}{Numeric. Vertical spacing multiplier between generations. Default: 1.} #' \item{unknown_shape, female_shape, male_shape, affected_shape}{Integers. Shape codes for plotting each group.} #' \item{sex_shape_labs}{Character vector of labels for the sex variable. (default: c("Female", "Male", "Unknown")} #' \item{unaffected, affected}{Values indicating unaffected/affected status.} @@ -41,23 +44,32 @@ ggPedigree <- function(ped, famID = "famID", momID = "momID", dadID = "dadID", status_col = NULL, - config = list()) { + config = list(), + debug = FALSE, + hints = NULL, + ...) { # ----- # STEP 1: Configuration and Preparation # ----- # Set default styling and layout parameters default_config <- list( + apply_default_theme = TRUE, + apply_default_scales = TRUE, spouse_segment_color = "black", self_segment_color = "purple", sibling_segment_color = "black", parent_segment_color = "black", offspring_segment_color = "black", + include_labels = TRUE, + label_method = "ggrepel", + label_text_angle = 0, code_male = 1, - text_size = 3, + label_text_size = 2, point_size = 4, line_width = 0.5, - generation_gap = 1, + generation_height = 1, + generation_width = 1, unknown_shape = 18, female_shape = 16, male_shape = 15, @@ -67,16 +79,20 @@ ggPedigree <- function(ped, famID = "famID", affected = "affected", sex_color = TRUE, status_vals = c(1, 0), - max_overlaps = 100, - id_segment_color = NA + max_overlaps = 15, + id_segment_color = NA#, + # hints = NULL ) + + + # Merge with user-specified overrides # This allows the user to override any of the default values config <- utils::modifyList(default_config, config) # Set additional internal config values based on other entries - config$status_labs <- c(paste0(config$affected), paste0(config$unaffected)) + config$status_labs <- c(config$affected, config$unaffected) config$shape_vals <- c(config$female_shape, config$male_shape, config$unknown_shape) # ----- @@ -93,10 +109,10 @@ ggPedigree <- function(ped, famID = "famID", # Clean duplicated famID columns if present if ("famID.y" %in% names(ds_ped)) { - ds_ped <- dplyr::select(ds_ped, -.data$famID.y) + ds_ped <- dplyr::select(.data=ds_ped, -"famID.y") } if ("famID.x" %in% names(ds_ped)) { - ds_ped <- dplyr::rename(ds_ped, famID = .data$famID.x) + ds_ped <- dplyr::rename(.data=ds_ped, famID = "famID.x") } # If personID is not "personID", rename to "personID" internally @@ -111,6 +127,7 @@ ggPedigree <- function(ped, famID = "famID", ) } + # ----- # STEP 3: Sex Recode # ----- @@ -131,22 +148,26 @@ ggPedigree <- function(ped, famID = "famID", config = config ) - # Apply vertical spacing factor if generation_gap ≠ 1 - if (!isTRUE(all.equal(config$generation_gap, 1))) { - ds$y_pos <- ds$y_pos * config$generation_gap # expand/contract generations + # Apply vertical spacing factor if generation_height ≠ 1 + if (!isTRUE(all.equal(config$generation_height, 1))) { + ds$y_pos <- ds$y_pos * config$generation_height # expand/contract generations + } + # Apply horizontal spacing factor if generation_width ≠ 1 + if (!isTRUE(all.equal(config$generation_width, 1))) { + ds$x_pos <- ds$x_pos * config$generation_width # expand/contract generations } - # ----- # STEP 5: Compute Relationship Connections # ----- # Generate a connection table for plotting lines (parents, spouses, etc.) - connections <- calculateConnections(ds, config = config) + plot_connections <- calculateConnections(ds, config = config) + connections <- plot_connections$connections # ----- # STEP 6: Initialize Plot # ----- - gap_off <- 0.5 * config$generation_gap # single constant for all “stub” offsets + gap_off <- 0.5 * config$generation_height # single constant for all “stub” offsets p <- ggplot2::ggplot(ds, ggplot2::aes( x = .data$x_pos, @@ -158,21 +179,6 @@ ggPedigree <- function(ped, famID = "famID", # STEP 7: Add Segments # ----- - # Self-segment (for duplicate layout appearances of same person) - if ("x_otherself" %in% names(connections)) { - p <- p + ggplot2::geom_segment( - data = connections, - ggplot2::aes( - x = .data$x_otherself, - xend = .data$x_pos, - y = .data$y_otherself, - yend = .data$y_pos - ), - linewidth = config$line_width, - color = config$self_segment_color, - na.rm = TRUE - ) - } # Spouse link between two parents p <- p + ggplot2::geom_segment( @@ -186,20 +192,22 @@ ggPedigree <- function(ped, famID = "famID", linewidth = config$line_width, color = config$spouse_segment_color, na.rm = TRUE - ) + - # Parent-child stub (child to mid-sibling point) - ggplot2::geom_segment( - data = connections, - ggplot2::aes( - x = .data$x_mid_sib, - xend = .data$x_midparent, - y = .data$y_mid_sib - gap_off, - yend = .data$y_midparent - ), - linewidth = config$line_width, - color = config$parent_segment_color, - na.rm = TRUE - ) + + ) + + # Parent-child stub (child to mid-sibling point) + + p <- p + ggplot2::geom_segment( + data = connections, + ggplot2::aes( + x = .data$x_mid_sib, + xend = .data$x_midparent, + y = .data$y_mid_sib - gap_off, + yend = .data$y_midparent + ), + linewidth = config$line_width, + color = config$parent_segment_color, + na.rm = TRUE + ) + # Mid-sibling to parents midpoint ggplot2::geom_segment( data = connections, @@ -229,6 +237,7 @@ ggPedigree <- function(ped, famID = "famID", + # ----- # STEP 8: Add Points (nodes) # ----- @@ -293,44 +302,101 @@ ggPedigree <- function(ped, famID = "famID", # STEP 9: Add Labels # ----- # Add labels to the points using ggrepel for better visibility - p <- p + - ggrepel::geom_text_repel(ggplot2::aes(label = .data$personID), - nudge_y = -.15 * config$generation_gap, - size = config$text_size, - na.rm = TRUE, - max.overlaps = config$max_overlaps, - segment.size = config$line_width * .5, - segment.color = config$id_segment_color, + if (config$include_labels == TRUE && config$label_method == "ggrepel") { + p <- p + + ggrepel::geom_text_repel(ggplot2::aes(label = .data$personID), + nudge_y = -.10*config$generation_height, + size = config$label_text_size, + na.rm = TRUE, + max.overlaps = config$max_overlaps, + segment.size = config$line_width * .5, + angle = config$label_text_angle, + segment.color = config$id_segment_color + ) + } else if (config$include_labels == TRUE && config$label_method == "geom_label") { + p <- p + + ggplot2::geom_label(ggplot2::aes(label = .data$personID), + nudge_y = -.10 * config$generation_height, + size = config$label_text_size, + angle = config$label_text_angle, + na.rm = TRUE + + ) + } else if (config$include_labels == TRUE || config$label_method == "geom_text") { + p <- p + + ggplot2::geom_text(ggplot2::aes(label = .data$personID), + nudge_y = -.10*config$generation_height, + size = config$label_text_size, + angle = config$label_text_angle, + na.rm = TRUE + ) + } + + + # Self-segment (for duplicate layout appearances of same person) + if (inherits(plot_connections$self_coords, "data.frame")) { + otherself <- plot_connections$self_coords |> + dplyr::filter(!is.na(.data$x_otherself)) |> + dplyr::mutate( + otherself_xkey = symKey(.data$x_otherself, .data$x_pos) # , + # otherself_ykey = symKey(.data$y_otherself, .data$y_pos) + ) |> + # unique combinations of x_otherself and x_pos and y_otherself and y_pos + dplyr::distinct(.data$otherself_xkey, .keep_all = TRUE) + + + p <- p + ggplot2::geom_curve( + data = otherself, + ggplot2::aes( + x = .data$x_otherself, + xend = .data$x_pos, + y = .data$y_otherself, + yend = .data$y_pos + ), + linewidth = config$line_width, + color = config$self_segment_color, + angle = 90, + curvature = -0.2, + na.rm = TRUE ) + } + + # ----- # STEP 10: Scales, Theme # ----- p <- p + - ggplot2::scale_y_reverse() + - ggplot2::theme_minimal() + - ggplot2::theme( - axis.title.y = ggplot2::element_blank(), - axis.text.y = ggplot2::element_blank(), - axis.ticks.y = ggplot2::element_blank(), - panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.title.x = ggplot2::element_blank(), - axis.text.x = ggplot2::element_blank(), - axis.ticks.x = ggplot2::element_blank() - ) + ggplot2::scale_y_reverse() + + if(config$apply_default_theme == TRUE) { + p <- p + + ggplot2::theme_minimal() + + ggplot2::theme( + axis.title.y = ggplot2::element_blank(), + axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.title.x = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank() + ) +} + # ----- # STEP 11: Final Legend Adjustments # ----- # Adjust legend labels and colors based on the configuration - p <- p + ggplot2::scale_shape_manual( - values = config$shape_vals, - labels = config$shape_labs - ) + if(config$apply_default_scales == TRUE) { + p <- p + ggplot2::scale_shape_manual( + values = config$shape_vals, + labels = config$shape_labs + ) # Add alpha scale for affected status if applicable if (!is.null(status_col) && config$sex_color == TRUE) { @@ -354,10 +420,21 @@ ggPedigree <- function(ped, famID = "famID", } else { p <- p + ggplot2::labs(shape = "Sex") } + } - return(p) -} + if (debug == TRUE) { + return(list( + plot = p, + data = ds, + connections = connections, + config = config + )) + } else { + # If debug is FALSE, return only the plot + return(p) + } +} #' @rdname ggPedigree #' @export diff --git a/R/processExtras.R b/R/processExtras.R new file mode 100644 index 00000000..3d485c26 --- /dev/null +++ b/R/processExtras.R @@ -0,0 +1,333 @@ +#' Process duplicate appearances of individuals in a pedigree layout +#' +#' Resolves layout conflicts when the same individual appears in multiple places +#' (e.g., due to inbreeding loops). Keeps the layout point that is closest to a relevant +#' relative (mom, dad, or spouse) and removes links to others to avoid confusion in visualization. +#' +#' @param ped A data.frame containing pedigree layout info with columns including: +#' `personID`, `x_pos`, `y_pos`, `dadID`, `momID`, and a logical column `extra`. +#' @param config A list of configuration options. Currently unused but passed through to internal helpers. +#' +#' @return A modified `ped` data.frame with updated coordinates and removed duplicates. +#' +#' @keywords internal + +processExtras <- function(ped, config = list()) { + # ---- sanity checks ------------------------------------------------------- + if (!inherits(ped, "data.frame")) { + stop("ped must be a data.frame") + } + + req_cols <- c( + "personID", "x_pos", "y_pos", + "momID", "dadID", "spouseID", "extra" + ) + miss <- setdiff(req_cols, names(ped)) + if (length(miss)) { + stop("ped is missing columns: ", paste(miss, collapse = ", ")) + } + + # ---- 1. ensure a unique row key ---- + + ped$newID <- seq_len(nrow(ped)) + + idsextras <- dplyr::filter(ped, .data$extra == TRUE) |> + dplyr::select("personID") |> + dplyr::pull() |> + unique() + + # check if momID == spouseID + if (any(ped$momID == ped$spouseID, na.rm = TRUE)) { + ped <- ped |> + dplyr::mutate( + momSpouse = dplyr::if_else(.data$spouseID == .data$momID, + TRUE, + FALSE + ) + ) + } else { + ped <- ped |> + dplyr::mutate( + momSpouse = FALSE + ) + } + if (any(ped$dadID == ped$spouseID, na.rm = TRUE)) { + ped <- ped |> + dplyr::mutate( + dadSpouse = dplyr::if_else(.data$spouseID == .data$dadID, + TRUE, + FALSE + ) + ) + } else { + ped <- ped |> + dplyr::mutate( + dadSpouse = FALSE + ) + } + + + # ---- 2. give every extra appearance a unique numeric personID ----------- + ped <- ped |> + dplyr::arrange(.data$personID, .data$newID) |> + dplyr::mutate( + coreID = .data$personID, + personID = dplyr::if_else( + .data$extra, + .data$personID + .data$newID / 1000, # numeric, unique + .data$personID + ), + total_blue = .data$dadSpouse | .data$momSpouse + ) |> + dplyr::select(-.data$dadSpouse, -.data$momSpouse) + + ped <- ped |> # flag anyone with extra appearances + dplyr::mutate(extra = dplyr::case_when( + .data$coreID %in% idsextras ~ TRUE, + .data$momID %in% idsextras ~ TRUE, + .data$dadID %in% idsextras ~ TRUE, + .data$spouseID %in% idsextras ~ TRUE, + TRUE ~ .data$extra + )) + + + # ---- 3. isolate duplicates for distance logic --------------------------- + extras <- dplyr::filter(ped, .data$extra) + + # ---- 3a. attach relative coordinates (same helpers you use) ------------- + # Mother's coordinates + mom_coords <- getRelativeCoordinates( + ped = ped, + connections = extras, + relativeIDvar = "momID", + x_name = "x_mom", + y_name = "y_mom", + multiple = "any" + ) + + # Father's coordinates + dad_coords <- getRelativeCoordinates( + ped = ped, + connections = extras, + relativeIDvar = "dadID", + x_name = "x_dad", + y_name = "y_dad", + multiple = "any" + ) + + # Spouse's coordinates + spouse_coords <- getRelativeCoordinates( + ped = ped, + connections = extras, + relativeIDvar = "spouseID", + x_name = "x_spouse", + y_name = "y_spouse", + multiple = "all" + ) + + parent_hash_coords <- extras |> + dplyr::left_join(mom_coords, by = c("newID", "personID", "momID")) |> + dplyr::left_join(dad_coords, by = c("newID", "personID", "dadID")) |> + dplyr::filter(!is.na(.data$parent_hash)) |> + dplyr::mutate( + x_parent_hash = mean(c(.data$x_dad, .data$x_mom), na.rm = TRUE), + y_parent_hash = mean(c(.data$y_dad, .data$y_mom), na.rm = TRUE) + ) |> + dplyr::select( + .data$newID, .data$personID, + .data$x_parent_hash, .data$y_parent_hash + ) + + extras <- extras |> + dplyr::left_join(mom_coords, by = c("newID", "personID", "momID")) |> + dplyr::left_join(dad_coords, by = c("newID", "personID", "dadID")) |> + dplyr::left_join(spouse_coords, by = c("newID", "personID", "spouseID")) |> + dplyr::left_join(parent_hash_coords, by = c("newID", "personID")) + + # ---- 3b. compute distance metrics -------------- + extras <- extras |> + dplyr::mutate( + dist_mom = computeDistance( + method = "cityblock", + x1 = .data$x_pos, y1 = .data$y_pos, + x2 = .data$x_mom, y2 = .data$y_mom + ), + dist_dad = computeDistance( + method = "cityblock", + x1 = .data$x_pos, y1 = .data$y_pos, + x2 = .data$x_dad, y2 = .data$y_dad + ), + dist_spouse = computeDistance( + method = "cityblock", + x1 = .data$x_pos, y1 = .data$y_pos, + x2 = .data$x_spouse, y2 = .data$y_spouse + ), + total_parent_dist_cityblock = computeDistance( + method = "cityblock", + x1 = .data$x_pos, y1 = .data$y_pos, + x2 = .data$x_parent_hash, y2 = .data$y_parent_hash + ), + total_parent_dist2 = .data$dist_mom + .data$dist_dad + ) + + # ---- 4. choose winning duplicate per relationship ----------------------- + + spouse_winner <- extras |> + dplyr::group_by(.data$coreID, .data$spouseID) |> + dplyr::slice_min(.data$dist_spouse, n = 1, with_ties = FALSE) |> + dplyr::ungroup() |> + dplyr::select("coreID", spouse_choice = .data$personID) + + + if (sum(ped$total_blue, na.rm = TRUE) == 0) { + parent_winner <- extras |> + dplyr::group_by("coreID") |> + dplyr::slice_min(.data$total_parent_dist_cityblock, n = 1, with_ties = FALSE) |> + dplyr::ungroup() |> + dplyr::select("coreID", parent_choice = .data$personID) + } else { + # if there are spouseID == momID or spouseID == dadID, then parent choice needs to be the 2nd closest + parent_winner <- extras |> + dplyr::group_by("coreID") |> + dplyr::arrange(.data$total_parent_dist2, .by_group = TRUE) |> + dplyr::mutate( + rank = dplyr::row_number(), # 1 = closest, 2 = second‑closest … + pick_rank = base::ifelse(any(.data$total_blue), 2L, 1L) # group‑level choice + ) |> + dplyr::filter(.data$rank == .data$pick_rank) |> + dplyr::ungroup() |> + dplyr::select("coreID", parent_choice = .data$personID) + } + # ---- 5. row‑wise relink using nearest appearance ------------------------- + + + + + # lookup table: every appearance of every coreID + dup_xy <- ped |> + dplyr::select("personID", "coreID", "x_pos", "y_pos", "total_blue") + + closest_dup <- function(target_core, x0, y0) { + cand <- dup_xy[dup_xy$coreID == target_core, ] + if (nrow(cand) == 0L) { + return(NA_real_) + } + # compute Manhattan (“city‑block”) distance for all candidates + d <- computeDistance( + method = "cityblock", + x1 = x0, y1 = y0, + x2 = cand$x_pos, y2 = cand$y_pos + ) + ord <- order(d) # ascending distance + pick <- if(any(cand$total_blue, na.rm = TRUE)){ + 2L }else{ 1L} # 2nd if blue present, else 1st + + if(length(ord) < pick) pick <- 1L + + cand$personID[ord[pick]] + + # cand$personID[ + # which.min( + # computeDistance(method = "cityblock", + # x1= x0, y1=y0, + # x2=cand$x_pos, y2=cand$y_pos) + # ) + # ] + } + + relink <- function(df, col) { + df |> + dplyr::rowwise() |> + dplyr::mutate( + "{col}" := { + tgt <- .data[[col]] + if (is.na(tgt)) { + NA_real_ + } else { + closest_dup(tgt, .data$x_pos, .data$y_pos) + } + } + ) |> + dplyr::ungroup() + } + + + + # remove parent ids from all but the closest coreID, + # if there's no choice to be made, then keep existing momID + + ped <- ped |> + dplyr::left_join(spouse_winner, by = "coreID") |> + dplyr::left_join(parent_winner, by = "coreID") |> + dplyr::mutate( + momID = dplyr::case_when( + .data$personID == .data$parent_choice ~ .data$momID, + !is.na(.data$parent_choice) ~ NA_real_, + TRUE ~ .data$momID + ), + dadID = dplyr::case_when( + .data$personID == .data$parent_choice ~ .data$dadID, + !is.na(.data$parent_choice) ~ NA_real_, + TRUE ~ .data$dadID + ), + spouseID = dplyr::case_when( + .data$personID == .data$spouse_choice ~ .data$spouseID, + !is.na(.data$spouse_choice) ~ NA_real_, + TRUE ~ .data$spouseID + ) + ) |> + dplyr::select( + -.data$parent_choice, -.data$spouse_choice, + -dplyr::starts_with("newID") + ) + ped <- ped |> + relink("spouseID") |> + relink("momID") |> + relink("dadID") + + # + + # rehash + ped <- ped |> + dplyr::mutate( + parent_hash = symKey(.data$momID, .data$dadID), + couple_hash = symKey(.data$personID, .data$spouseID) + ) |> + dplyr::mutate( + parent_hash = gsub("NA.NA", NA_real_, .data$parent_hash), + couple_hash = gsub("NA.NA", NA_real_, .data$couple_hash) + ) + # ---- 6. remove duplicates and return ------------------------------------ + + # Coordinates of the individual's other appearance ("self") + self_coords <- extras |> + dplyr::left_join( + ped, + by = c("coreID"), + suffix = c("", "_other"), + # relationship = relationship, + multiple = "all" + ) |> + dplyr::filter(.data$personID != .data$personID_other) |> + dplyr::mutate( + x_otherself = .data$x_pos_other, + y_otherself = .data$y_pos_other + ) |> + # dplyr::select( + # .data$personID, + # # .data$coreID, + # .data$x_pos, + # .data$y_pos, + # .data$x_otherself, + # .data$y_otherself, + # + # ) |> + unique() + + full_extra <- list( + ped = ped, + self_coords = self_coords + ) + + return(full_extra) +} diff --git a/R/processExtras.X b/R/processExtras.X new file mode 100644 index 00000000..3c486a36 --- /dev/null +++ b/R/processExtras.X @@ -0,0 +1,436 @@ + + +#' Process duplicate appearances of individuals in a pedigree layout +#' +#' Resolves layout conflicts when the same individual appears in multiple places +#' (e.g., due to inbreeding loops). Keeps the layout point that is closest to a relevant +#' relative (mom, dad, or spouse) and removes links to others to avoid confusion in visualization. +#' +#' @param ped A data.frame containing pedigree layout info with columns including: +#' `personID`, `x_pos`, `y_pos`, `dadID`, `momID`, and a logical column `extra`. +#' @param config A list of configuration options. Currently unused but passed through to internal helpers. +#' +#' @return A modified `ped` data.frame with updated coordinates and removed duplicates. +#' +#' @keywords internal + +processExtrasx <- function(ped, config = list()) { + # ----- + # Check inputs + # ----- + if (!inherits(ped, "data.frame")) { + stop("ped should be a data.frame or inherit to a data.frame") + } + + req_cols <- c("personID", "x_pos", "y_pos", "dadID", "momID") + if (!all(req_cols %in% names(ped))) { + stop("ped must contain personID, x_pos, y_pos, dadID, and momID columns") + } + + default_config <- list() + config <- utils::modifyList(default_config, config) + + # Assign unique ID per row for later use + ped$newID <- seq_len(nrow(ped)) + + # ----- + # Identify duplicated individuals + # ----- + + # Find all individuals with extra appearances + idsextras <- dplyr::filter(ped, .data$extra == TRUE) |> + dplyr::select("personID") |> + dplyr::pull() |> + unique() + + ped <- ped |> # flag anyone with extra appearances + dplyr::mutate(extra = dplyr::case_when(.data$personID %in% idsextras ~ TRUE, + .data$momID %in% idsextras ~ TRUE, + .data$dadID %in% idsextras ~ TRUE, + .data$spouseID %in% idsextras ~ TRUE, + TRUE ~ .data$extra)) + + + # ----- + # Subset to duplicated entries only # note that tidyselect hates .data pronouns + # ----- + + extras <- dplyr::filter(ped, .data$personID %in% idsextras) |> + dplyr::select( + "newID", + "personID", + "x_pos", "y_pos", + "dadID", "momID","parent_hash", "couple_hash", + "spouseID" + ) + + # ----- + # Get coordinate positions of relatives and other-self + # ----- + + # Mother's coordinates + mom_coords <- getRelativeCoordinates( + ped = ped, + connections = extras, + relativeIDvar = "momID", + x_name = "x_mom", + y_name = "y_mom", + multiple = "any" + ) + + # Father's coordinates + dad_coords <- getRelativeCoordinates( + ped = ped, + connections = extras, + relativeIDvar = "dadID", + x_name = "x_dad", + y_name = "y_dad", + multiple = "any" + ) + + # Spouse's coordinates + spouse_coords <- getRelativeCoordinates( + ped = ped, + connections = extras, + relativeIDvar = "spouseID", + x_name = "x_spouse", + y_name = "y_spouse", + multiple = "all" + ) + + # parent_hash coordinates + parent_hash_coords <- extras |> # need to get mom and dad coordinates + dplyr::left_join(mom_coords, by = c("newID", "personID", "momID")) |> + dplyr::left_join(dad_coords, by = c("newID", "personID", "dadID")) |> + dplyr::left_join( + ped, + by = c("parent_hash"), + suffix = c("", "_sib"), + multiple = "all" + ) |> + dplyr::filter(!is.na(.data$parent_hash)) |> + dplyr::mutate( + x_parent_hash = mean(c( + .data$x_dad, + .data$x_mom + )), + y_parent_hash = mean(c( + .data$y_dad, + .data$y_mom + )) + ) |> + dplyr::select( + .data$newID, + .data$personID, + .data$parent_hash, + .data$couple_hash, + .data$x_parent_hash, + .data$y_parent_hash + ) + + + # Coordinates of the individual's other appearance ("self") + self_coords <- extras |> + dplyr::left_join( + ped, + by = c("personID"), + suffix = c("", "_other"), + # relationship = relationship, + multiple = "all" + ) |> + dplyr::filter(.data$newID != .data$newID_other) |> + dplyr::mutate( + x_otherself = .data$x_pos_other, + y_otherself = .data$y_pos_other + ) |> + dplyr::select( + .data$newID, + .data$personID, + .data$newID_other, + .data$x_otherself, + .data$y_otherself + ) + + # ----- + # Merge coordinates into the extra rows + # ----- + + extras <- extras |> + dplyr::left_join(mom_coords, by = c("newID", "personID", "momID")) |> + dplyr::left_join(dad_coords, by = c("newID", "personID", "dadID")) |> + dplyr::left_join(self_coords, by = c("newID", "personID")) |> + dplyr::left_join(spouse_coords, + by = c("newID", "personID", "spouseID"), + multiple = "all" + ) |> + dplyr::left_join(parent_hash_coords, + by = c("newID", "personID", "parent_hash"), + multiple = "all" + ) + +#print(extras) + # ----- + # Compute Euclidean distances between this appearance and: + # - mom, dad, spouse + # - same individual in other location (otherself) + # These will be used to choose the "closest" relationship. + # minkowski distance could be used here as well aka "city block" distance + # ----- + extras <- extras |> + dplyr::mutate( + dist_mom = computeDistance(method = "cityblock", + x1 = .data$x_pos, + y1 = .data$y_pos, + x2 = .data$x_mom, + y2 = .data$y_mom), + + dist_mom_other = computeDistance(method = "cityblock", + x1 = .data$x_otherself, + y1 = .data$y_otherself, + x2 = .data$x_mom, + y2 = .data$y_mom), + dist_dad = computeDistance(method = "cityblock", + x1 = .data$x_pos, + y1 = .data$y_pos, + x2 = .data$x_dad, + y2 = .data$y_dad), + dist_dad_other = computeDistance(method = "cityblock", + x1 = .data$x_otherself, + y1 = .data$y_otherself, + x2 = .data$x_dad, + y2 = .data$y_dad), + # spouse distance + dist_spouse = computeDistance(method = "cityblock", + x1 = .data$x_pos, + y1 = .data$y_pos, + x2 = .data$x_spouse, + y2 = .data$y_spouse), + dist_spouse_other = computeDistance(method = "cityblock", + x1 = .data$x_otherself, + y1 = .data$y_otherself, + x2 = .data$x_spouse, + y2 = .data$y_spouse), + dist_otherself = computeDistance(method = "cityblock", + x1 = .data$x_pos, + y1 = .data$y_pos, + x2 = .data$x_otherself, + y2 = .data$y_otherself), + dist_parent_hash = computeDistance(method = "cityblock", + x1 = .data$x_pos, + y1 = .data$y_pos, + x2 = .data$x_parent_hash, + y2 = .data$y_parent_hash), + dist_parent_hash_other = computeDistance(method = "cityblock", + x1 = .data$x_otherself, + y1 = .data$y_otherself, + x2 = .data$x_parent_hash, + y2 = .data$y_parent_hash) + + + ) + + + # ----- + # When there are multiple spouses, keep only the appearance + # where the individual is closest to one of their spouses. + # ----- + +# extras <- extras |> +# dplyr::group_by(.data$newID, .data$personID) |> +# dplyr::mutate( +# min_spouse = min(.data$dist_spouse, na.rm = TRUE), +# num_spouse = dplyr::n() +# ) |> +# dplyr::ungroup() +# extras <- extras |> +# dplyr::filter(.data$num_spouse == 1 | .data$dist_spouse == .data$min_spouse) |> +# dplyr::select( +# -.data$min_spouse, +# -.data$num_spouse +# ) + + + # ----- + # Determine the "closest relative" to this duplicated row + # ----- + + + + # For each duplicated appearance, we now ask: + # - Is this appearance closer to mom than the otherself copy is? + # - Same for dad? For spouse? + + extras <- extras |> + dplyr::mutate( + mom_closer = dplyr::case_when( + .data$dist_mom < .data$dist_mom_other ~ TRUE, + .data$dist_mom_other < .data$dist_mom ~ FALSE, + .data$dist_mom == .data$dist_mom_other & + .data$newID < .data$newID_other ~ TRUE, + TRUE ~ FALSE + ), + dad_closer = dplyr::case_when( + .data$dist_dad < .data$dist_dad_other ~ TRUE, + .data$dist_dad_other < .data$dist_dad ~ FALSE, + .data$dist_dad == .data$dist_dad_other & + .data$newID < .data$newID_other ~ TRUE, + TRUE ~ TRUE + ), + spouse_closer = dplyr::case_when( + .data$dist_spouse < .data$dist_spouse_other ~ TRUE, + .data$dist_spouse_other < .data$dist_spouse ~ FALSE, + .data$dist_spouse == .data$dist_spouse_other & + .data$newID < .data$newID_other ~ TRUE, + + TRUE ~ FALSE + ), + parent_hash_closer = dplyr::case_when( + .data$dist_parent_hash < .data$dist_parent_hash_other ~ TRUE, + .data$dist_parent_hash_other < .data$dist_parent_hash ~ FALSE, + TRUE ~ FALSE + ) + ) + + + # Then: + # - Determine which of mom, dad, or spouse is closest in absolute terms + # - Use that to decide whether to retain connections to that relative + +# extras <- extras |> +# dplyr::mutate( +# closest_relative = dplyr::case_when( +# .data$dist_mom <= .data$dist_dad & .data$dist_mom <= .data$dist_spouse ~ "mom", +# .data$dist_dad < .data$dist_mom & .data$dist_dad <= .data$dist_spouse ~ "dad", +# TRUE ~ "spouse" +# ) +# ) + + # ----- + # Based on which relative is closest, determine which links to keep + # ----- + + extras <- extras |> + dplyr::mutate( + link_as_mom = .data$mom_closer, + link_as_dad = .data$dad_closer, + link_as_spouse = .data$spouse_closer, + link_as_sibling = .data$link_as_mom | .data$link_as_dad +#.data$parent_hash_closer + ) + + ### per‑spouse keeper ---------------------------------------- + extras <- extras |> + dplyr::mutate( + dist_spouse_fix = dplyr::if_else(is.na(.data$dist_spouse), + Inf, .data$dist_spouse) + ) |> + dplyr::group_by(.data$personID) |> + dplyr::mutate( + keep_spouse = (.data$dist_spouse_fix == + min(.data$dist_spouse_fix, na.rm = TRUE)) #& + # (dplyr::row_number() == 1) + ) |> + dplyr::ungroup() |> + dplyr::mutate( + link_as_spouse = .data$link_as_spouse & .data$keep_spouse + ) |> + dplyr::select(-"dist_spouse_fix", -"keep_spouse") |> unique() + + ### END INSERT -------------------------------------------------------------- + + # --- Keep ONE appearance per person for parent / sibling links ---- + extras <- extras |> + dplyr::mutate( + total_parent_dist = dplyr::if_else( + is.na(.data$dist_mom + .data$dist_dad), + Inf, + .data$dist_mom + .data$dist_dad + ) + ) |> + dplyr::group_by(.data$personID) |> + dplyr::mutate( + min_total_parent_dist = min(.data$total_parent_dist, na.rm = TRUE), + keep_links = (.data$total_parent_dist == .data$min_total_parent_dist)# & + # (dplyr::row_number() == 1) + ) |> + dplyr::ungroup() |> + dplyr::mutate( + link_as_mom = .data$link_as_mom & .data$keep_links, + link_as_dad = .data$link_as_dad & .data$keep_links, + link_as_sibling = .data$link_as_sibling & .data$keep_links + ) |> + dplyr::select( + -"total_parent_dist", + -"min_total_parent_dist", + -"keep_links" + ) |> dplyr::mutate( + link_any = dplyr::case_when( + .data$link_as_mom == TRUE | .data$link_as_dad == TRUE | + .data$link_as_sibling == TRUE | .data$link_as_spouse == TRUE ~ TRUE, + TRUE ~ FALSE + ) + ) + + + # ----- + # Final subset of relevant decision columns + # ----- + + skinnyextras <- extras |> + dplyr::select( + .data$newID, + .data$link_as_dad, + .data$link_as_mom, + .data$link_as_spouse, + .data$link_as_sibling, + .data$link_any, + .data$x_otherself, + .data$y_otherself + ) + + + # ----- + # Apply decisions to main pedigree + # Removes connection references for non-kept parents/spouses + # ----- + + ped <- ped |> + dplyr::left_join(skinnyextras, + by = c("newID"), suffix = c("", "_")#, + # relationship = "one-to-one" + ) |> + dplyr::select( + -"newID" + ) |> + # set the connection columns to TRUE if not kept + dplyr::mutate( + link_as_mom = dplyr::case_when( + is.na(.data$link_as_mom) ~ TRUE, + .data$link_as_mom == TRUE ~ TRUE, + .data$link_as_mom == FALSE ~ FALSE + ), + link_as_dad = dplyr::case_when( + is.na(.data$link_as_dad) ~ TRUE, + .data$link_as_dad == TRUE ~ TRUE, + .data$link_as_dad == FALSE ~ FALSE + ), + link_as_spouse = dplyr::case_when( + .data$link_as_spouse == FALSE ~ FALSE, + is.na(.data$link_as_spouse) ~ TRUE, + .data$link_as_spouse == TRUE ~ TRUE), + link_as_sibling = dplyr::case_when( + is.na(.data$link_as_sibling) ~ TRUE, + .data$link_as_sibling == TRUE ~ TRUE, + .data$link_as_sibling == FALSE ~ FALSE), + link_any = dplyr::case_when( + is.na(.data$link_any) ~ TRUE, + .data$link_any == TRUE ~ TRUE, + .data$link_any == FALSE ~ FALSE) + ) |> filter( + .data$link_any == TRUE + ) + + + + return(ped) +} diff --git a/man/calculateConnections.Rd b/man/calculateConnections.Rd index 8f334678..6ec2293e 100644 --- a/man/calculateConnections.Rd +++ b/man/calculateConnections.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calcCoordinates.R +% Please edit documentation in R/calcConnections.R \name{calculateConnections} \alias{calculateConnections} \title{Calculate connections for a pedigree dataset} diff --git a/man/computeDistance.Rd b/man/computeDistance.Rd new file mode 100644 index 00000000..e42f244a --- /dev/null +++ b/man/computeDistance.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calcConnectionsHelpers.R +\name{computeDistance} +\alias{computeDistance} +\title{Compute distance between two points} +\usage{ +computeDistance(x1, y1, x2, y2, method = "euclidean", p = NULL) +} +\arguments{ +\item{x1}{Numeric. X-coordinate of the first point.} + +\item{y1}{Numeric. Y-coordinate of the first point.} + +\item{x2}{Numeric. X-coordinate of the second point.} + +\item{y2}{Numeric. Y-coordinate of the second point.} + +\item{method}{Character. Method of distance calculation. Options are "euclidean", "cityblock", and "Minkowski".} + +\item{p}{Numeric. The order of the Minkowski distance. If NULL, defaults to 2 for Euclidean and 1 for Manhattan. If +Minkowski method is used, p should be specified.} +} +\description{ +This function calculates the distance between two points in a 2D space using +Minkowski distance. It can be used to compute Euclidean or Manhattan distance. +It is a utility function for calculating distances in pedigree layouts. +Defaults to Euclidean distance if no method is specified. +} diff --git a/man/getMidpoints.Rd b/man/getMidpoints.Rd index 1e74d3f0..f1d7b2eb 100644 --- a/man/getMidpoints.Rd +++ b/man/getMidpoints.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calcCoordinates.R +% Please edit documentation in R/calcConnectionsHelpers.R \name{getMidpoints} \alias{getMidpoints} \title{Compute midpoints across grouped coordinates} diff --git a/man/getRelativeCoordinates.Rd b/man/getRelativeCoordinates.Rd index 60a7da73..b280cc0c 100644 --- a/man/getRelativeCoordinates.Rd +++ b/man/getRelativeCoordinates.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calcCoordinates.R +% Please edit documentation in R/calcConnectionsHelpers.R \name{getRelativeCoordinates} \alias{getRelativeCoordinates} \title{Get coordinate positions of relatives for each individual} @@ -11,7 +11,8 @@ getRelativeCoordinates( x_name, y_name, personID = "personID", - multiple = "all" + multiple = "all", + only_unique = TRUE ) } \arguments{ @@ -28,6 +29,8 @@ getRelativeCoordinates( \item{personID}{Character string specifying the column name for individual IDs.} \item{multiple}{Character. Specifies how to handle multiple matches. Options are "all" or "any".} + +\item{only_unique}{Logical. If TRUE, return only unique rows. Defaults to TRUE.} } \value{ A `data.frame` with columns: diff --git a/man/ggPedigree.Rd b/man/ggPedigree.Rd index 0d6664d9..edc38456 100644 --- a/man/ggPedigree.Rd +++ b/man/ggPedigree.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ggPedigree.R +% Please edit documentation in R/ggpedigree.R \name{ggPedigree} \alias{ggPedigree} \alias{ggpedigree} @@ -13,7 +13,10 @@ ggPedigree( momID = "momID", dadID = "dadID", status_col = NULL, - config = list() + config = list(), + debug = FALSE, + hints = NULL, + ... ) ggpedigree( @@ -23,7 +26,10 @@ ggpedigree( momID = "momID", dadID = "dadID", status_col = NULL, - config = list() + config = list(), + debug = FALSE, + hints = NULL, + ... ) ggped( @@ -33,7 +39,10 @@ ggped( momID = "momID", dadID = "dadID", status_col = NULL, - config = list() + config = list(), + debug = FALSE, + hints = NULL, + ... ) } \arguments{ @@ -53,8 +62,8 @@ ggped( \describe{ \item{code_male}{Integer or string. Value identifying males in the sex column. (typically 0 or 1) Default: 1.} \item{spouse_segment_color, self_segment_color, sibling_segment_color, parent_segment_color, offspring_segment_color}{Character. Line colors for respective connection types.} - \item{text_size, point_size, line_width}{Numeric. Controls text size, point size, and line thickness.} - \item{generation_gap}{Numeric. Vertical spacing multiplier between generations. Default: 1.} + \item{label_text_size, point_size, line_width}{Numeric. Controls text size, point size, and line thickness.} + \item{generation_height}{Numeric. Vertical spacing multiplier between generations. Default: 1.} \item{unknown_shape, female_shape, male_shape, affected_shape}{Integers. Shape codes for plotting each group.} \item{sex_shape_labs}{Character vector of labels for the sex variable. (default: c("Female", "Male", "Unknown")} \item{unaffected, affected}{Values indicating unaffected/affected status.} @@ -62,6 +71,12 @@ ggped( \item{max_overlaps}{Maximum number of overlaps allowed in repelled labels.} \item{id_segment_color}{Color used for label connector lines.} }} + +\item{debug}{Logical. If TRUE, prints debugging information. Default: FALSE.} + +\item{hints}{Data frame with hints for layout adjustments. Default: NULL.} + +\item{...}{Additional arguments passed to `ggplot2` functions.} } \value{ A `ggplot` object rendering the pedigree diagram. diff --git a/man/processExtras.Rd b/man/processExtras.Rd index 57183be4..ab0ec543 100644 --- a/man/processExtras.Rd +++ b/man/processExtras.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calcCoordinates.R +% Please edit documentation in R/processExtras.R \name{processExtras} \alias{processExtras} \title{Process duplicate appearances of individuals in a pedigree layout} diff --git a/tests/testthat/test-calcCoordinates.R b/tests/testthat/test-calcCoordinates.R index 495f004e..64306503 100644 --- a/tests/testthat/test-calcCoordinates.R +++ b/tests/testthat/test-calcCoordinates.R @@ -61,7 +61,7 @@ test_that("calculateConnections returns expected structure", { "x_mid_sib", "y_mid_sib" ) - expect_true(all(expected_cols %in% names(conns))) + expect_true(all(expected_cols %in% names(conns$connections))) }) @@ -120,3 +120,18 @@ test_that("getRelativeCoordinates returns expected coordinates for mother", { expect_false("A" %in% mom_coords$personID) expect_false("B" %in% mom_coords$personID) }) + + +test_that("broken hints doesn't cause a fatal error", { + library(BGmisc) + data("potter") + + # Test with hints + expect_warning( + ggPedigree(potter, + famID = "famID", + personID = "personID", + config = list(hints = TRUE) + ) + ) +}) diff --git a/vignettes/plots.R b/vignettes/plots.R index 37029abb..b633353e 100644 --- a/vignettes/plots.R +++ b/vignettes/plots.R @@ -62,7 +62,7 @@ p <- ggPedigree( p ## ----------------------------------------------------------------------------- -p <- ggPedigree( +ggPedigree( hazard, famID = "famID", personID = "ID", @@ -75,8 +75,6 @@ p <- ggPedigree( ) ) -p - ## ----------------------------------------------------------------------------- p + facet_wrap(~famID, scales = "free_x") @@ -100,3 +98,136 @@ p + labels = c("Female", "Male", "Unknown") ) +## ----message=FALSE, warning=FALSE--------------------------------------------- +library(BGmisc) # helper utilities & example data + +data("inbreeding") + +df <- inbreeding + +#df <- dplyr::filter(df, famID %in% c(5, 7)) + + +p <- ggPedigree( + df, + famID = "famID", + personID = "ID", + status_col = "proband", +# debug = TRUE, + config = list( + code_male = 0, + sex_color = F, + # label_method = "geom_text", + affected = TRUE, + unaffected = FALSE, + generation_height = 2, + generation_width = 1, + affected_shape = 4, + spouse_segment_color = "pink", + sibling_segment_color = "blue", + parent_segment_color = "green", + offspring_segment_color = "black" + ) +) + +# p$connections%>%filter(personID ==60) %>% nrow() +# p$connections%>%filter(personID ==66) %>% unique() +# p$connections%>%filter(personID ==65) %>% unique() + +# p$connections%>%filter(personID >=61 & +# personID <62 ) %>% unique() + +p + facet_wrap(~famID, scales= "free") #+ scale_color_viridis( + # discrete = TRUE, + # labels = c("TRUE", "FALSE") +# ) + theme_bw(base_size = 14) + guides(colour="none", shape="none") + +## ----------------------------------------------------------------------------- +library(tibble) + +pedigree_df <- tribble( + ~personID, ~momID, ~dadID, ~sex, ~famID, + 10011, NA, NA, 0, 1, + 10012, NA, NA, 1, 1, + 10021, NA, NA, 1, 1, + 10022, 10011, 10012, 1, 1, + 10023, 10011, 10012, 0, 1, + 10024, NA, NA, 0, 1, + 10025, NA, NA, 0, 1, + 10026, 10011, 10012, 0, 1, + 10027, 10011, 10012, 1, 1, + 10031, 10023, 10021, 0, 1, + 10032, 10023, 10021, 1, 1, + 10033, 10023, 10021, 1, 1, + 10034, 10023, 10021, 1, 1, + 10035, 10023, 10021, 0, 1, + 10036, 10024, 10022, 1, 1, + 10037, 10024, 10022, 0, 1, + 10038, 10025, 10027, 1, 1, + 10039, 10025, 10027, 0, 1, + 10310, 10025, 10027, 1, 1, + 10311, 10025, 10027, 1, 1, + 10312, 10025, 10027, 0, 1, + 10011, NA, NA, 0, 2, + 10012, NA, NA, 1, 2, + 10021, NA, NA, 0, 2, + 10022, 10011, 10012, 0, 2, + 10023, 10011, 10012, 1, 2, + 10024, 10011, 10012, 1, 2, + 10025, NA, NA, 1, 2, + 10026, 10011, 10012, 0, 2, + 10027, NA, NA, 1, 2, + 10031, 10021, 10023, 1, 2, + 10032, 10021, 10023, 0, 2, + 10033, 10021, 10023, 1, 2, + 10034, 10022, 10025, 0, 2, + 10035, 10022, 10025, 0, 2, + 10036, 10022, 10025, 1, 2, + 100310, 10022, 10025, 1, 2, + 10037, 10026, 10027, 0, 2, + 10038, 10026, 10027, 0, 2, + 10039, 10026, 10027, 0, 2, + 100311, 10026, 10027, 1, 2, + 100312, 10026, 10027, 1, 2 +) %>% mutate (proband = TRUE) + +#pedigree_df <- recodeSex(pedigree_df,code_male = 1, recode_male = "M") +pedigree_df$personID[pedigree_df$famID == 1] <- pedigree_df$personID[pedigree_df$famID == 1]-10000 +pedigree_df$momID[pedigree_df$famID == 1] <- pedigree_df$momID[pedigree_df$famID == 1]-10000 +pedigree_df$dadID[pedigree_df$famID == 1] <- pedigree_df$dadID[pedigree_df$famID == 1]-10000 + + + +p <- ggPedigree( + pedigree_df, + famID = "famID", + personID = "personID", + status_col = "proband", +# debug = TRUE, + config = list( + code_male = 1, + sex_color = F, + apply_default_scales = FALSE, + label_method = "geom_text", + # affected = TRUE, + # unaffected = FALSE, + generation_height = 1, + generation_width = 1, + affected_shape = 4, + spouse_segment_color = "black", + sibling_segment_color = "black", + parent_segment_color = "black", + offspring_segment_color = "black" + ) +) + +p + scale_shape_manual( + values = c(16, 15, 15), + labels = c("Female", "Male", "Unknown") + ) + + guides(colour="none", shape="none") + # guides(colour="none", shape="none") + +#facet_wrap(~famID, scales= "free") + + + diff --git a/vignettes/plots.Rmd b/vignettes/plots.Rmd index 9b5c74b7..7363aaa8 100644 --- a/vignettes/plots.Rmd +++ b/vignettes/plots.Rmd @@ -134,8 +134,6 @@ ggPedigree( unaffected = FALSE ) ) - - ``` # Multiple families in one graphic @@ -170,3 +168,141 @@ p + labels = c("Female", "Male", "Unknown") ) ``` + +# More complex examples + +```{r message=FALSE, warning=FALSE} +library(BGmisc) # helper utilities & example data + +data("inbreeding") + +df <- inbreeding + +#df <- dplyr::filter(df, famID %in% c(5, 7)) + + +p <- ggPedigree( + df, + famID = "famID", + personID = "ID", + status_col = "proband", +# debug = TRUE, + config = list( + code_male = 0, + sex_color = F, + # label_method = "geom_text", + affected = TRUE, + unaffected = FALSE, + generation_height = 2, + generation_width = 1, + affected_shape = 4, + spouse_segment_color = "pink", + sibling_segment_color = "blue", + parent_segment_color = "green", + offspring_segment_color = "black" + ) +) + +# p$connections%>%filter(personID ==60) %>% nrow() +# p$connections%>%filter(personID ==66) %>% unique() +# p$connections%>%filter(personID ==65) %>% unique() + +# p$connections%>%filter(personID >=61 & +# personID <62 ) %>% unique() + +p + facet_wrap(~famID, scales= "free") #+ scale_color_viridis( + # discrete = TRUE, + # labels = c("TRUE", "FALSE") +# ) + theme_bw(base_size = 14) + guides(colour="none", shape="none") +``` + +# Power paper +```{r} +library(tibble) + +pedigree_df <- tribble( + ~personID, ~momID, ~dadID, ~sex, ~famID, + 10011, NA, NA, 0, 1, + 10012, NA, NA, 1, 1, + 10021, NA, NA, 1, 1, + 10022, 10011, 10012, 1, 1, + 10023, 10011, 10012, 0, 1, + 10024, NA, NA, 0, 1, + 10025, NA, NA, 0, 1, + 10026, 10011, 10012, 0, 1, + 10027, 10011, 10012, 1, 1, + 10031, 10023, 10021, 0, 1, + 10032, 10023, 10021, 1, 1, + 10033, 10023, 10021, 1, 1, + 10034, 10023, 10021, 1, 1, + 10035, 10023, 10021, 0, 1, + 10036, 10024, 10022, 1, 1, + 10037, 10024, 10022, 0, 1, + 10038, 10025, 10027, 1, 1, + 10039, 10025, 10027, 0, 1, + 10310, 10025, 10027, 1, 1, + 10311, 10025, 10027, 1, 1, + 10312, 10025, 10027, 0, 1, + 10011, NA, NA, 0, 2, + 10012, NA, NA, 1, 2, + 10021, NA, NA, 0, 2, + 10022, 10011, 10012, 0, 2, + 10023, 10011, 10012, 1, 2, + 10024, 10011, 10012, 1, 2, + 10025, NA, NA, 1, 2, + 10026, 10011, 10012, 0, 2, + 10027, NA, NA, 1, 2, + 10031, 10021, 10023, 1, 2, + 10032, 10021, 10023, 0, 2, + 10033, 10021, 10023, 1, 2, + 10034, 10022, 10025, 0, 2, + 10035, 10022, 10025, 0, 2, + 10036, 10022, 10025, 1, 2, + 100310, 10022, 10025, 1, 2, + 10037, 10026, 10027, 0, 2, + 10038, 10026, 10027, 0, 2, + 10039, 10026, 10027, 0, 2, + 100311, 10026, 10027, 1, 2, + 100312, 10026, 10027, 1, 2 +) %>% mutate (proband = TRUE) + +#pedigree_df <- recodeSex(pedigree_df,code_male = 1, recode_male = "M") +pedigree_df$personID[pedigree_df$famID == 1] <- pedigree_df$personID[pedigree_df$famID == 1]-10000 +pedigree_df$momID[pedigree_df$famID == 1] <- pedigree_df$momID[pedigree_df$famID == 1]-10000 +pedigree_df$dadID[pedigree_df$famID == 1] <- pedigree_df$dadID[pedigree_df$famID == 1]-10000 + + + +p <- ggPedigree( + pedigree_df, + famID = "famID", + personID = "personID", + status_col = "proband", +# debug = TRUE, + config = list( + code_male = 1, + sex_color = F, + apply_default_scales = FALSE, + label_method = "geom_text", + # affected = TRUE, + # unaffected = FALSE, + generation_height = 1, + generation_width = 1, + affected_shape = 4, + spouse_segment_color = "black", + sibling_segment_color = "black", + parent_segment_color = "black", + offspring_segment_color = "black" + ) +) + +p + scale_shape_manual( + values = c(16, 15, 15), + labels = c("Female", "Male", "Unknown") + ) + + guides(colour="none", shape="none") + # guides(colour="none", shape="none") + +#facet_wrap(~famID, scales= "free") + + +``` diff --git a/vignettes/plots.html b/vignettes/plots.html index 8a990059..202fe542 100644 --- a/vignettes/plots.html +++ b/vignettes/plots.html @@ -369,7 +369,7 @@
ggPedigree() automatically:
reshapes the data by family (ped2fam()),
Because the result is just a ggplot object, regular layering applies:
ggPedigree(potter,
@@ -407,7 +407,7 @@ Customizing the plot
personID = "personID"
) +
theme_bw(base_size = 12)If you set sex_color to FALSE, the affected values will be filled with the default color palette.
-p <- ggPedigree(
+ggPedigree(
hazard,
famID = "famID",
personID = "ID",
@@ -456,10 +456,8 @@ Additional customization
affected = TRUE,
unaffected = FALSE
)
-)
-
-p
-
+)library(BGmisc) # helper utilities & example data
+
+data("inbreeding")
+
+df <- inbreeding
+
+#df <- dplyr::filter(df, famID %in% c(5, 7))
+
+
+p <- ggPedigree(
+ df,
+ famID = "famID",
+ personID = "ID",
+ status_col = "proband",
+# debug = TRUE,
+ config = list(
+ code_male = 0,
+ sex_color = F,
+ # label_method = "geom_text",
+ affected = TRUE,
+ unaffected = FALSE,
+ generation_height = 2,
+ generation_width = 1,
+ affected_shape = 4,
+ spouse_segment_color = "pink",
+ sibling_segment_color = "blue",
+ parent_segment_color = "green",
+ offspring_segment_color = "black"
+ )
+)
+
+# p$connections%>%filter(personID ==60) %>% nrow()
+# p$connections%>%filter(personID ==66) %>% unique()
+# p$connections%>%filter(personID ==65) %>% unique()
+
+# p$connections%>%filter(personID >=61 &
+# personID <62 ) %>% unique()
+
+p + facet_wrap(~famID, scales= "free") #+ scale_color_viridis(library(tibble)
+
+pedigree_df <- tribble(
+ ~personID, ~momID, ~dadID, ~sex, ~famID,
+ 10011, NA, NA, 0, 1,
+ 10012, NA, NA, 1, 1,
+ 10021, NA, NA, 1, 1,
+ 10022, 10011, 10012, 1, 1,
+ 10023, 10011, 10012, 0, 1,
+ 10024, NA, NA, 0, 1,
+ 10025, NA, NA, 0, 1,
+ 10026, 10011, 10012, 0, 1,
+ 10027, 10011, 10012, 1, 1,
+ 10031, 10023, 10021, 0, 1,
+ 10032, 10023, 10021, 1, 1,
+ 10033, 10023, 10021, 1, 1,
+ 10034, 10023, 10021, 1, 1,
+ 10035, 10023, 10021, 0, 1,
+ 10036, 10024, 10022, 1, 1,
+ 10037, 10024, 10022, 0, 1,
+ 10038, 10025, 10027, 1, 1,
+ 10039, 10025, 10027, 0, 1,
+ 10310, 10025, 10027, 1, 1,
+ 10311, 10025, 10027, 1, 1,
+ 10312, 10025, 10027, 0, 1,
+ 10011, NA, NA, 0, 2,
+ 10012, NA, NA, 1, 2,
+ 10021, NA, NA, 0, 2,
+ 10022, 10011, 10012, 0, 2,
+ 10023, 10011, 10012, 1, 2,
+ 10024, 10011, 10012, 1, 2,
+ 10025, NA, NA, 1, 2,
+ 10026, 10011, 10012, 0, 2,
+ 10027, NA, NA, 1, 2,
+ 10031, 10021, 10023, 1, 2,
+ 10032, 10021, 10023, 0, 2,
+ 10033, 10021, 10023, 1, 2,
+ 10034, 10022, 10025, 0, 2,
+ 10035, 10022, 10025, 0, 2,
+ 10036, 10022, 10025, 1, 2,
+ 100310, 10022, 10025, 1, 2,
+ 10037, 10026, 10027, 0, 2,
+ 10038, 10026, 10027, 0, 2,
+ 10039, 10026, 10027, 0, 2,
+ 100311, 10026, 10027, 1, 2,
+ 100312, 10026, 10027, 1, 2
+) %>% mutate (proband = TRUE)
+
+#pedigree_df <- recodeSex(pedigree_df,code_male = 1, recode_male = "M")
+pedigree_df$personID[pedigree_df$famID == 1] <- pedigree_df$personID[pedigree_df$famID == 1]-10000
+pedigree_df$momID[pedigree_df$famID == 1] <- pedigree_df$momID[pedigree_df$famID == 1]-10000
+pedigree_df$dadID[pedigree_df$famID == 1] <- pedigree_df$dadID[pedigree_df$famID == 1]-10000
+
+
+
+p <- ggPedigree(
+ pedigree_df,
+ famID = "famID",
+ personID = "personID",
+ status_col = "proband",
+# debug = TRUE,
+ config = list(
+ code_male = 1,
+ sex_color = F,
+ apply_default_scales = FALSE,
+ label_method = "geom_text",
+ # affected = TRUE,
+ # unaffected = FALSE,
+ generation_height = 1,
+ generation_width = 1,
+ affected_shape = 4,
+ spouse_segment_color = "black",
+ sibling_segment_color = "black",
+ parent_segment_color = "black",
+ offspring_segment_color = "black"
+ )
+)
+
+p + scale_shape_manual(
+ values = c(16, 15, 15),
+ labels = c("Female", "Male", "Unknown")
+ ) +
+ guides(colour="none", shape="none")