From 682df518f34d3725093d1f30ac0f39280feb72f5 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Thu, 17 Apr 2025 22:32:31 -0400 Subject: [PATCH 01/11] graph tracing wip --- R/calculateFamilySize.R | 9 +- R/readWikifamilytree.R | 173 ++++++++++++++++++++++++----- tests/testthat/test-readWikiTree.R | 54 +++++++++ 3 files changed, 205 insertions(+), 31 deletions(-) diff --git a/R/calculateFamilySize.R b/R/calculateFamilySize.R index 06cc3f16..c346cb66 100644 --- a/R/calculateFamilySize.R +++ b/R/calculateFamilySize.R @@ -1,4 +1,4 @@ -#' allGens +#' calcAllGens #' A function to calculate the number of individuals in each generation. This is a supporting function for \code{simulatePedigree}. #' @param kpc Number of kids per couple (integer >= 2). #' @param Ngen Number of generations (integer >= 1). @@ -24,10 +24,9 @@ calcAllGens <- function(kpc, Ngen, marR) { return(allGens) } #' @rdname calcAllGens -#' @export allGens <- calcAllGens -#' sizeAllGens +#' calcFamilySizeByGen #' An internal supporting function for \code{simulatePedigree}. #' @inheritParams calcAllGens #' @return Returns a vector including the number of individuals in every generation. @@ -49,10 +48,9 @@ calcFamilySizeByGen <- function(kpc, Ngen, marR) { return(allGens) } #' @rdname calcFamilySizeByGen -#' @export sizeAllGens <- calcFamilySizeByGen -#' famSizeCal +#' calcFamilySize #' A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree} #' @inheritParams calcAllGens #' @return Returns a numeric value indicating the total pedigree size. @@ -77,6 +75,5 @@ calcFamilySize <- function(kpc, Ngen, marR) { } #' @rdname calcFamilySize -#' @export #' famSizeCal <- calcFamilySize diff --git a/R/readWikifamilytree.R b/R/readWikifamilytree.R index fc0a7521..a59ea7c3 100644 --- a/R/readWikifamilytree.R +++ b/R/readWikifamilytree.R @@ -148,6 +148,10 @@ parseTree <- function(tree_lines) { return(tree_df) } + + + + #' infer relationship from tree template #' #' @param tree_long A data frame containing the tree structure in long format. @@ -155,43 +159,162 @@ parseTree <- function(tree_lines) { #' @keywords internal #' parseRelationships <- function(tree_long) { + +traced <- traceTreePaths(tree_long, deduplicate = FALSE) + + # Initialize relationships data frame relationships <- data.frame( - id = tree_long$id, + id = tree_long$id[!is.na(tree_long$id)], momID = NA_character_, dadID = NA_character_, + parent_1 = NA_character_, + parent_2 = NA_character_, spouseID = NA_character_, stringsAsFactors = FALSE ) - # Loop through rows to find connections - for (i in seq_len(nrow(tree_long))) { - row <- tree_long[i, ] - - # **Parent-Child Detection** - if (row$Value == "y") { - parent <- tree_long$Value[tree_long$Row == row$Row - 1 & tree_long$Column == row$Column] - child <- tree_long$Value[tree_long$Row == row$Row + 1 & tree_long$Column == row$Column] - - if (length(parent) == 0) parent <- NA - if (length(child) == 0) child <- NA - # Assign mom/dad IDs based on tree structure - if (!is.na(parent) && !is.na(child)) { - relationships$momID[relationships$id == child] <- parent - relationships$dadID[relationships$id == child] <- parent # Assuming one parent detected for now + traced <- traced[!is.na(traced$from_id) & !is.na(traced$to_id), ] + + + # > traced +# from_id to_id path_length intermediates intermediate_values +# A B 2 1_2 + +# A C 5 1_2;2_2;3_2;4_2 +|y| +# B C 5 1_2;2_2;3_2;4_2 +|y| + # Fill in relationships based on the tree structure + traced$relationship <- NA_character_ + + + # if intermediate values is "+", the relationship is spouse + + + traced$relationship[traced$intermediate_values=="+"] <- "spouse" + + #if intermediate value contains "y", the relationship is parent-child + + traced$relationship[traced$intermediate_values=="+|y|"] <- "parent-child" + traced$relationship[traced$intermediate_values=="|y|+"] <- "child-parent" + + + + return(relationships) +} + + + +#' Trace paths between individuals in a family tree grid +#' +#' @param tree_long A data.frame with columns: Row, Column, Value, id +#' @param deduplicate Logical, if TRUE, will remove duplicate paths +#' @return A data.frame with columns: from_id, to_id, direction, path_length, intermediates +#' @export +traceTreePaths <- function(tree_long, deduplicate = TRUE) { + # Keep only relevant cells (people and path symbols) + path_symbols <- c("|", "-", "+", "v", "^", "y", ",", ".", "`", "!") + tree_long$Value <- gsub("\\s+", "", tree_long$Value) # Remove whitespace + active_cells <- tree_long[!is.na(tree_long$Value) & + (tree_long$Value %in% path_symbols | !is.na(tree_long$id)), ] + + active_cells$key <- paste(active_cells$Row, active_cells$Column, sep = "_") + + + edges <- do.call(rbind, lapply(seq_len(nrow(active_cells)), function(i) { + from_key <- active_cells$key[i] + to_keys <- findNeighbors(active_cells[i, ], + active_keys=active_cells$key) + if (length(to_keys) > 0) { + data.frame(from = from_key, to = to_keys, stringsAsFactors = FALSE) + } + })) + + # Create graph + g <- igraph::graph_from_data_frame(edges, directed = FALSE) + + # Map keys to IDs + person_nodes <- active_cells[!is.na(active_cells$id), c("key", "id")] + id_map <- setNames(person_nodes$id, person_nodes$key) + + # Find all pairs of people and trace paths + person_keys <- names(id_map) + result <- data.frame() + + for (i in seq_along(person_keys)) { + for (j in seq_along(person_keys)) { + if (i == j) next + from_key <- person_keys[i] + to_key <- person_keys[j] + + sp <- suppressWarnings(igraph::shortest_paths(g, from_key, to_key, output = "vpath")$vpath[[1]]) + if (length(sp) > 1) { + intermediate <- setdiff(names(sp), c(from_key, to_key)) + # Extract values at those intermediate keys + intermediate_values <- sapply(intermediate, function(k) { + cell <- active_cells[active_cells$key == k, ] + if (nrow(cell) > 0) cell$Value else NA + }) + result <- rbind(result, data.frame( + from_id = id_map[[from_key]], + to_id = id_map[[to_key]], + path_length = length(sp) - 1, + intermediates = paste(intermediate, collapse = ";"), + intermediate_values = paste(intermediate_values, collapse = ""), + stringsAsFactors = FALSE + ))} else { + # If no path found, add a row with NA values + result <- rbind(result, data.frame( + from_id = id_map[[from_key]], + to_id = id_map[[to_key]], + path_length = NA, + intermediates = NA, + intermediate_values = NA, + stringsAsFactors = FALSE + )) + } } } - # **Spouse Detection** - if (row$Value == "+") { - spouse1 <- tree_long$Value[tree_long$Row == row$Row & tree_long$Column == row$Column - 1] - spouse2 <- tree_long$Value[tree_long$Row == row$Row & tree_long$Column == row$Column + 1] + if(deduplicate==TRUE){ + # Deduplicate pairs + result <- deduplicatePairs(result) + } - if (!is.na(spouse1) && !is.na(spouse2)) { - relationships$spouseID[relationships$id == spouse1] <- spouse2 - relationships$spouseID[relationships$id == spouse2] <- spouse1 - } + return(result) +} + +#' Build adjacency list (4-way neighbors) +#' +#' @param cell A data frame with columns Row and Column +#' @return A character vector of neighboring cell keys +#' @keywords internal + +findNeighbors <- function(cell,active_keys) { + offsets <- list(c(1, 0), c(-1, 0), c(0, 1), c(0, -1)) # down, up, right, left + out <- character() + for (offset in offsets) { + r2 <- cell$Row + offset[1] + c2 <- cell$Column + offset[2] + key2 <- paste(r2, c2, sep = "_") + if (key2 %in% active_keys) { + out <- c(out, key2) } } + return(out) +} - return(relationships) +#' Deduplicate pairs of IDs in a data frame +#' +#' @param df A data frame with columns from_id and to_id +#' @return A data frame with unique pairs of IDs +#' @export +deduplicatePairs <- function(df) { + # Create a new column with sorted pairs + df$pair <- apply(df[, c("from_id", "to_id")], 1, function(x) paste(sort(x), collapse = "_")) + + # Remove duplicates based on the pair column + df_dedup <- df[!duplicated(df$pair), ] + + # Drop the pair column + df_dedup$pair <- NULL + + return(df_dedup) } diff --git a/tests/testthat/test-readWikiTree.R b/tests/testthat/test-readWikiTree.R index 166539d9..03067c2c 100644 --- a/tests/testthat/test-readWikiTree.R +++ b/tests/testthat/test-readWikiTree.R @@ -1,5 +1,59 @@ # readWikifamilytree + +test_that("traceTreePaths works correctly for horizontal tree", { + # Create a mock tree_horizontal data frame + # This is a simplified version of the tree_horizontal data frame + # with Row, Column, Value, and id columns + # The id column is used to identify the nodes in the tree + +tree_horizontal <- data.frame( + Row = rep(1, 3), + Column = 1:3, + Value = c("A", "+", "B"), + stringsAsFactors = FALSE +) +tree_horizontal$id <- NA +tree_horizontal$id[tree_horizontal$Value %in% c("A", "B")] <- tree_horizontal$Value[tree_horizontal$Value %in% c("A", "B")] + +result <- traceTreePaths(tree_horizontal) +# Check the result + expect_equal(result$path[1], "A") + expect_equal(result$path[2], "B") + expect_equal(result$path[3], NA) +}) + + +test_that("traceTreePaths works correctly for vertical tree", { + # Create a mock tree_vertical data frame + # This is a simplified version of the tree_vertical data frame + # with Row, Column, Value, and id columns + # The id column is used to identify the nodes in the tree +tree_spouse_child <- data.frame( + Row = c(1, 1, 1, 2, 3, 4, 5), + Column = c(1, 2, 3, 2, 2, 2, 2), + Value = c("A", "+", "B", "|", "y", "|", "C"), + stringsAsFactors = FALSE +) +tree_spouse_child$id <- NA +tree_spouse_child$id[tree_spouse_child$Value %in% c("A", "B", "C")] <- tree_spouse_child$Value[tree_spouse_child$Value %in% c("A", "B", "C")] + +result <- traceTreePaths(tree_spouse_child) + +expect_equal(result$path[1], "A") + expect_equal(result$path[2], "B") + expect_equal(result$path[3], "C") + expect_equal(result$path[4], "y") + expect_equal(result$path[5], NA) +}) + + + + + + + + test_that("readWikifamilytree reads a string correctly", { # Create a temporary WikiFamilyTree file for testing # Example usage From 4886b201dcf5335179123d79bd99f43992bc08b7 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Fri, 18 Apr 2025 23:21:32 -0400 Subject: [PATCH 02/11] docs --- NAMESPACE | 5 ++--- man/calcAllGens.Rd | 4 ++-- man/calcFamilySize.Rd | 4 ++-- man/calcFamilySizeByGen.Rd | 4 ++-- man/deduplicatePairs.Rd | 17 +++++++++++++++++ man/findNeighbors.Rd | 18 ++++++++++++++++++ man/traceTreePaths.Rd | 19 +++++++++++++++++++ 7 files changed, 62 insertions(+), 9 deletions(-) create mode 100644 man/deduplicatePairs.Rd create mode 100644 man/findNeighbors.Rd create mode 100644 man/traceTreePaths.Rd diff --git a/NAMESPACE b/NAMESPACE index 69e23307..67e8e90a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,6 @@ # Generated by roxygen2: do not edit by hand export(SimPed) -export(allGens) export(assignCoupleIDs) export(calcAllGens) export(calcFamilySize) @@ -13,10 +12,10 @@ export(checkSex) export(com2links) export(comp2vech) export(createGenDataFrame) +export(deduplicatePairs) export(dropLink) export(evenInsert) export(extractSummaryText) -export(famSizeCal) export(fitComponentModel) export(identifyComponentModel) export(inferRelatedness) @@ -44,7 +43,6 @@ export(relatedness) export(repairSex) export(resample) export(simulatePedigree) -export(sizeAllGens) export(summariseFamilies) export(summariseMatrilines) export(summarisePatrilines) @@ -53,6 +51,7 @@ export(summarizeFamilies) export(summarizeMatrilines) export(summarizePatrilines) export(summarizePedigrees) +export(traceTreePaths) export(vech) import(data.table) import(kinship2) diff --git a/man/calcAllGens.Rd b/man/calcAllGens.Rd index 66a89c25..35871be7 100644 --- a/man/calcAllGens.Rd +++ b/man/calcAllGens.Rd @@ -3,7 +3,7 @@ \name{calcAllGens} \alias{calcAllGens} \alias{allGens} -\title{allGens +\title{calcAllGens A function to calculate the number of individuals in each generation. This is a supporting function for \code{simulatePedigree}.} \usage{ calcAllGens(kpc, Ngen, marR) @@ -21,6 +21,6 @@ allGens(kpc, Ngen, marR) Returns a vector containing the number of individuals in every generation. } \description{ -allGens +calcAllGens A function to calculate the number of individuals in each generation. This is a supporting function for \code{simulatePedigree}. } diff --git a/man/calcFamilySize.Rd b/man/calcFamilySize.Rd index a0128d0c..dda31d20 100644 --- a/man/calcFamilySize.Rd +++ b/man/calcFamilySize.Rd @@ -3,7 +3,7 @@ \name{calcFamilySize} \alias{calcFamilySize} \alias{famSizeCal} -\title{famSizeCal +\title{calcFamilySize A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree}} \usage{ calcFamilySize(kpc, Ngen, marR) @@ -21,6 +21,6 @@ famSizeCal(kpc, Ngen, marR) Returns a numeric value indicating the total pedigree size. } \description{ -famSizeCal +calcFamilySize A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree} } diff --git a/man/calcFamilySizeByGen.Rd b/man/calcFamilySizeByGen.Rd index ae3e5e88..f849339c 100644 --- a/man/calcFamilySizeByGen.Rd +++ b/man/calcFamilySizeByGen.Rd @@ -3,7 +3,7 @@ \name{calcFamilySizeByGen} \alias{calcFamilySizeByGen} \alias{sizeAllGens} -\title{sizeAllGens +\title{calcFamilySizeByGen An internal supporting function for \code{simulatePedigree}.} \usage{ calcFamilySizeByGen(kpc, Ngen, marR) @@ -21,6 +21,6 @@ sizeAllGens(kpc, Ngen, marR) Returns a vector including the number of individuals in every generation. } \description{ -sizeAllGens +calcFamilySizeByGen An internal supporting function for \code{simulatePedigree}. } diff --git a/man/deduplicatePairs.Rd b/man/deduplicatePairs.Rd new file mode 100644 index 00000000..4a042665 --- /dev/null +++ b/man/deduplicatePairs.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readWikifamilytree.R +\name{deduplicatePairs} +\alias{deduplicatePairs} +\title{Deduplicate pairs of IDs in a data frame} +\usage{ +deduplicatePairs(df) +} +\arguments{ +\item{df}{A data frame with columns from_id and to_id} +} +\value{ +A data frame with unique pairs of IDs +} +\description{ +Deduplicate pairs of IDs in a data frame +} diff --git a/man/findNeighbors.Rd b/man/findNeighbors.Rd new file mode 100644 index 00000000..58de5a20 --- /dev/null +++ b/man/findNeighbors.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readWikifamilytree.R +\name{findNeighbors} +\alias{findNeighbors} +\title{Build adjacency list (4-way neighbors)} +\usage{ +findNeighbors(cell, active_keys) +} +\arguments{ +\item{cell}{A data frame with columns Row and Column} +} +\value{ +A character vector of neighboring cell keys +} +\description{ +Build adjacency list (4-way neighbors) +} +\keyword{internal} diff --git a/man/traceTreePaths.Rd b/man/traceTreePaths.Rd new file mode 100644 index 00000000..5522ed83 --- /dev/null +++ b/man/traceTreePaths.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readWikifamilytree.R +\name{traceTreePaths} +\alias{traceTreePaths} +\title{Trace paths between individuals in a family tree grid} +\usage{ +traceTreePaths(tree_long, deduplicate = TRUE) +} +\arguments{ +\item{tree_long}{A data.frame with columns: Row, Column, Value, id} + +\item{deduplicate}{Logical, if TRUE, will remove duplicate paths} +} +\value{ +A data.frame with columns: from_id, to_id, direction, path_length, intermediates +} +\description{ +Trace paths between individuals in a family tree grid +} From 66a68f515716b9a858a56adbb0ddbf3d1f568858 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sat, 19 Apr 2025 12:16:29 -0400 Subject: [PATCH 03/11] smarter parsinh --- NAMESPACE | 1 - R/readWikifamilytree.R | 110 ++++++++++++++++++++++++++++++-------- man/assignParent.Rd | 22 ++++++++ man/deduplicatePairs.Rd | 1 + man/parseRelationships.Rd | 4 +- 5 files changed, 115 insertions(+), 23 deletions(-) create mode 100644 man/assignParent.Rd diff --git a/NAMESPACE b/NAMESPACE index 67e8e90a..fe31174b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,6 @@ export(checkSex) export(com2links) export(comp2vech) export(createGenDataFrame) -export(deduplicatePairs) export(dropLink) export(evenInsert) export(extractSummaryText) diff --git a/R/readWikifamilytree.R b/R/readWikifamilytree.R index a59ea7c3..5a663fb4 100644 --- a/R/readWikifamilytree.R +++ b/R/readWikifamilytree.R @@ -53,8 +53,9 @@ readWikifamilytree <- function(text = NULL, verbose = FALSE, file_path = NULL, . tree_long$DisplayName <- ifelse(!is.na(tree_long$name), tree_long$name, tree_long$Value) # Use name if available # parse relationships and infer them + tree_paths <- traceTreePaths(tree_long, deduplicate = FALSE) - relationships_df <- parseRelationships(tree_long) + paresedrelationships <- parseRelationships(tree_long, tree_paths) # relationships_df <- processParents(tree_long, datasource = "wiki") @@ -65,7 +66,8 @@ readWikifamilytree <- function(text = NULL, verbose = FALSE, file_path = NULL, . summary = summary_text, members = members_df, structure = tree_long, - relationships = relationships_df + tree_paths = paresedrelationships$tree_paths, + relationships = paresedrelationships$relationships ) } @@ -155,16 +157,22 @@ parseTree <- function(tree_lines) { #' infer relationship from tree template #' #' @param tree_long A data frame containing the tree structure in long format. +#' @param tree_paths Optional. traceTreePaths output. If NULL, it will be calculated. #' @return A data frame containing the relationships between family members. #' @keywords internal #' -parseRelationships <- function(tree_long) { +parseRelationships <- function(tree_long, tree_paths=NULL) { -traced <- traceTreePaths(tree_long, deduplicate = FALSE) + # Check if tree_paths is NULL and call traceTreePaths if necessary + if (is.null(tree_paths)) { + tree_paths <- traceTreePaths(tree_long, deduplicate = FALSE) + } + # Initialize relationships dataframe: one row per unique person + person_ids <- unique(tree_long$id[!is.na(tree_long$id)]) # Initialize relationships data frame relationships <- data.frame( - id = tree_long$id[!is.na(tree_long$id)], + id = person_ids, momID = NA_character_, dadID = NA_character_, parent_1 = NA_character_, @@ -173,33 +181,88 @@ traced <- traceTreePaths(tree_long, deduplicate = FALSE) stringsAsFactors = FALSE ) - traced <- traced[!is.na(traced$from_id) & !is.na(traced$to_id), ] - + tree_paths <- tree_paths[!is.na(tree_paths$from_id) & !is.na(tree_paths$to_id), ] - # > traced -# from_id to_id path_length intermediates intermediate_values -# A B 2 1_2 + -# A C 5 1_2;2_2;3_2;4_2 +|y| -# B C 5 1_2;2_2;3_2;4_2 +|y| # Fill in relationships based on the tree structure - traced$relationship <- NA_character_ + tree_paths$relationship <- NA_character_ + + # map relationships based on the intermediate values + + tree_paths$relationship[ + grepl("\\+", tree_paths$intermediate_values) & + !grepl("y", tree_paths$intermediate_values) + ] <- "spouse" + + # Parent-child: + and y both present + tree_paths$relationship[ + grepl("\\+", tree_paths$intermediate_values) & + grepl("y", tree_paths$intermediate_values) + ] <- "offspring" +tree_paths$relationship[ + is.na(tree_paths$relationship) & grepl("y", tree_paths$intermediate_values) +] <- "offspring" - # if intermediate values is "+", the relationship is spouse - traced$relationship[traced$intermediate_values=="+"] <- "spouse" + # determine direction + tree_paths$relationship[grepl("^\\+", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "parent-child" + tree_paths$relationship[grepl("[y\\|]$", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "parent-child" + tree_paths$relationship[grepl("\\+$", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "child-parent" + tree_paths$relationship[grepl("^[y\\|]", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "child-parent" - #if intermediate value contains "y", the relationship is parent-child - traced$relationship[traced$intermediate_values=="+|y|"] <- "parent-child" - traced$relationship[traced$intermediate_values=="|y|+"] <- "child-parent" +# Fill spouse links + spouse_links <- tree_paths[tree_paths$relationship == "spouse", ] + for (i in seq_len(nrow(spouse_links))) { + a <- spouse_links$from_id[i] + b <- spouse_links$to_id[i] + relationships$spouseID[relationships$id == a] <- b + relationships$spouseID[relationships$id == b] <- a + } + + # Fill parent-child links from directional tags - return(relationships) + pc_links <- tree_paths[tree_paths$relationship == "parent-child", ] + for (i in seq_len(nrow(pc_links))) { + relationships <- assignParent(df=relationships, + child=pc_links$to_id[i], + parent=pc_links$from_id[i]) + } + + # --- Child-parent (to_id = parent) --- + cp_links <- tree_paths[tree_paths$relationship == "child-parent", ] + for (i in seq_len(nrow(cp_links))) { + parent <- cp_links$to_id[i] + child <- cp_links$from_id[i] + relationships <- assignParent(relationships, child, parent) + } + + out <- list(tree_paths = tree_paths, + relationships = relationships) + + return(out) } +#' Assign Parent +#' @param df A data frame containing the relationships. +#' @param child The ID of the child. +#' @param parent The ID of the parent. +#' @return A data frame with updated parent information. +#' @keywords internal +assignParent <- function(df, child, parent) { + idx <- which(df$id == child) + if (length(idx) != 1) return(df) + + if (is.na(df$parent_1[idx])) { + df$parent_1[idx] <- parent + } else if (is.na(df$parent_2[idx]) && df$parent_1[idx] != parent) { + df$parent_2[idx] <- parent + } + return(df) + } #' Trace paths between individuals in a family tree grid @@ -210,7 +273,7 @@ traced <- traceTreePaths(tree_long, deduplicate = FALSE) #' @export traceTreePaths <- function(tree_long, deduplicate = TRUE) { # Keep only relevant cells (people and path symbols) - path_symbols <- c("|", "-", "+", "v", "^", "y", ",", ".", "`", "!") + path_symbols <- c("|", "-", "+", "v", "^", "y", ",", ".", "`", "!", "~", "x", ")", "(") tree_long$Value <- gsub("\\s+", "", tree_long$Value) # Remove whitespace active_cells <- tree_long[!is.na(tree_long$Value) & (tree_long$Value %in% path_symbols | !is.na(tree_long$id)), ] @@ -244,6 +307,11 @@ traceTreePaths <- function(tree_long, deduplicate = TRUE) { from_key <- person_keys[i] to_key <- person_keys[j] + # skip if either endpoint is not in graph + if (!(from_key %in% igraph::V(g)$name) || !(to_key %in% igraph::V(g)$name)) { + next + } + # Find the shortest path between the two keys sp <- suppressWarnings(igraph::shortest_paths(g, from_key, to_key, output = "vpath")$vpath[[1]]) if (length(sp) > 1) { intermediate <- setdiff(names(sp), c(from_key, to_key)) @@ -305,7 +373,7 @@ findNeighbors <- function(cell,active_keys) { #' #' @param df A data frame with columns from_id and to_id #' @return A data frame with unique pairs of IDs -#' @export +#' @keywords internal deduplicatePairs <- function(df) { # Create a new column with sorted pairs df$pair <- apply(df[, c("from_id", "to_id")], 1, function(x) paste(sort(x), collapse = "_")) diff --git a/man/assignParent.Rd b/man/assignParent.Rd new file mode 100644 index 00000000..a4a58c44 --- /dev/null +++ b/man/assignParent.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readWikifamilytree.R +\name{assignParent} +\alias{assignParent} +\title{Assign Parent} +\usage{ +assignParent(df, child, parent) +} +\arguments{ +\item{df}{A data frame containing the relationships.} + +\item{child}{The ID of the child.} + +\item{parent}{The ID of the parent.} +} +\value{ +A data frame with updated parent information. +} +\description{ +Assign Parent +} +\keyword{internal} diff --git a/man/deduplicatePairs.Rd b/man/deduplicatePairs.Rd index 4a042665..e133d803 100644 --- a/man/deduplicatePairs.Rd +++ b/man/deduplicatePairs.Rd @@ -15,3 +15,4 @@ A data frame with unique pairs of IDs \description{ Deduplicate pairs of IDs in a data frame } +\keyword{internal} diff --git a/man/parseRelationships.Rd b/man/parseRelationships.Rd index 24e864b5..5e017a3a 100644 --- a/man/parseRelationships.Rd +++ b/man/parseRelationships.Rd @@ -4,10 +4,12 @@ \alias{parseRelationships} \title{infer relationship from tree template} \usage{ -parseRelationships(tree_long) +parseRelationships(tree_long, tree_paths = NULL) } \arguments{ \item{tree_long}{A data frame containing the tree structure in long format.} + +\item{tree_paths}{Optional. traceTreePaths output. If NULL, it will be calculated.} } \value{ A data frame containing the relationships between family members. From ab1c7fd8da2d43b51f9e8e76815574a34154b00b Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sat, 19 Apr 2025 14:16:08 -0400 Subject: [PATCH 04/11] rename functions --- .gitignore | 26 ++-- NAMESPACE | 4 +- R/readWikifamilytree.R | 141 +++++++++--------- man/{parseTree.Rd => buildTreeGrid.Rd} | 6 +- ...akeLongTree.Rd => convertGrid2LongTree.Rd} | 6 +- ...{matchMembers.Rd => extractMemberTable.Rd} | 6 +- man/{findNeighbors.Rd => getGridNeighbors.Rd} | 6 +- ...ctSummaryText.Rd => getWikiTreeSummary.Rd} | 6 +- ...tionships.Rd => parseTreeRelationships.Rd} | 6 +- man/{assignParent.Rd => populateParents.Rd} | 6 +- tests/testthat/test-readWikiTree.R | 46 +++--- 11 files changed, 128 insertions(+), 131 deletions(-) rename man/{parseTree.Rd => buildTreeGrid.Rd} (82%) rename man/{makeLongTree.Rd => convertGrid2LongTree.Rd} (78%) rename man/{matchMembers.Rd => extractMemberTable.Rd} (82%) rename man/{findNeighbors.Rd => getGridNeighbors.Rd} (80%) rename man/{extractSummaryText.Rd => getWikiTreeSummary.Rd} (81%) rename man/{parseRelationships.Rd => parseTreeRelationships.Rd} (80%) rename man/{assignParent.Rd => populateParents.Rd} (82%) diff --git a/.gitignore b/.gitignore index f84ea082..5f04620b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,21 @@ -.Rproj.user +*.ASOIAF.ged +*.Rproj +*.knit.md .DS_Store -data-raw/ASOIAF.ged -R/.Rhistory +.Rdata .Rhistory -paper/paper.html +.Rproj.user +.httr-oauth +.quarto +.vscode/launch.json /Meta/ -*.knit.md -vignettes/articles/paper.html BGmisc.code-workspace -tests/testthat/Rplots.pdf -*.ASOIAF.ged -ASOIAF.ged -*.Rproj +R/.Rhistory benchmark_results.csv -.vscode/launch.json -dataRelatedPairs_new2.csv +data-raw/ASOIAF.ged data-raw/ASOIAF_040725.ged dataRelatedPairs.csv +dataRelatedPairs_new2.csv +paper/paper.html +tests/testthat/Rplots.pdf +vignettes/articles/paper.html diff --git a/NAMESPACE b/NAMESPACE index fe31174b..b5cff75a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(SimPed) export(assignCoupleIDs) +export(buildTreeGrid) export(calcAllGens) export(calcFamilySize) export(calculateRelatedness) @@ -14,14 +15,13 @@ export(comp2vech) export(createGenDataFrame) export(dropLink) export(evenInsert) -export(extractSummaryText) export(fitComponentModel) +export(getWikiTreeSummary) export(identifyComponentModel) export(inferRelatedness) export(insertEven) export(makeInbreeding) export(makeTwins) -export(parseTree) export(ped2add) export(ped2ce) export(ped2cn) diff --git a/R/readWikifamilytree.R b/R/readWikifamilytree.R index 5a663fb4..018caaef 100644 --- a/R/readWikifamilytree.R +++ b/R/readWikifamilytree.R @@ -28,23 +28,23 @@ readWikifamilytree <- function(text = NULL, verbose = FALSE, file_path = NULL, . } # Extract summary text - summary_text <- extractSummaryText(text) + summary_text <- getWikiTreeSummary(text) # Extract all lines defining the family tree tree_lines <- unlist(stringr::str_extract_all(text, "\\{\\{familytree.*?\\}\\}")) tree_lines <- tree_lines[!stringr::str_detect(tree_lines, "start|end")] # Remove start/end markers tree_lines <- gsub("\\{\\{familytree(.*?)\\}\\}", "\\1", tree_lines) # Remove wrapping markup # Convert tree structure into a coordinate grid (preserves symbols!) - tree_df <- parseTree(tree_lines) + tree_df <- buildTreeGrid(tree_lines) # Identify columns that start with "Y" cols_to_pivot <- grep("^Y", names(tree_df), value = TRUE) # Reshape from wide to long format - tree_long <- makeLongTree(tree_df, cols_to_pivot) + tree_long <- convertGrid2LongTree(tree_df, cols_to_pivot) # Extract member definitions - members_df <- matchMembers(text) + members_df <- extractMemberTable(text) members_df$id <- paste0("P", seq_len(nrow(members_df))) # Assign unique person IDs # Merge names into the tree structure (keeping all symbols!) @@ -55,19 +55,14 @@ readWikifamilytree <- function(text = NULL, verbose = FALSE, file_path = NULL, . # parse relationships and infer them tree_paths <- traceTreePaths(tree_long, deduplicate = FALSE) - paresedrelationships <- parseRelationships(tree_long, tree_paths) - - # relationships_df <- processParents(tree_long, datasource = "wiki") - - - + parsedRelationships <- parseTreeRelationships(tree_long, tree_paths) # Return structured table of the family tree (symbols included) list( summary = summary_text, members = members_df, structure = tree_long, - tree_paths = paresedrelationships$tree_paths, - relationships = paresedrelationships$relationships + tree_paths = parsedRelationships$tree_paths, + relationships = parsedRelationships$relationships ) } @@ -76,7 +71,7 @@ readWikifamilytree <- function(text = NULL, verbose = FALSE, file_path = NULL, . #' @param cols_to_pivot A character vector of column names to pivot. #' @return A long data frame containing the tree structure. #' @keywords internal -makeLongTree <- function(tree_df, cols_to_pivot) { +convertGrid2LongTree <- function(tree_df, cols_to_pivot) { tree_long <- stats::reshape(tree_df, varying = cols_to_pivot, v.names = "Value", @@ -97,7 +92,7 @@ makeLongTree <- function(tree_df, cols_to_pivot) { #' @return A data frame containing information about the members of the family tree. #' @keywords internal -matchMembers <- function(text) { +extractMemberTable <- function(text) { member_matches <- stringr::str_extract_all(text, "\\|\\s*([A-Za-z0-9]+)\\s*=\\s*([^|}]*)")[[1]] member_matches <- gsub("\\[|\\]|'''", "", member_matches) # Remove formatting @@ -122,7 +117,7 @@ matchMembers <- function(text) { #' @keywords internal #' @export -extractSummaryText <- function(text) { +getWikiTreeSummary <- function(text) { summary_match <- stringr::str_match(text, "\\{\\{familytree/start \\|summary=(.*?)\\}\\}") summary_text <- ifelse(!is.na(summary_match[, 2]), summary_match[, 2], NA) return(summary_text) @@ -134,7 +129,7 @@ extractSummaryText <- function(text) { #' @keywords internal #' @export -parseTree <- function(tree_lines) { +buildTreeGrid <- function(tree_lines) { tree_matrix <- base::strsplit(tree_lines, "\\|") # Split each row into columns max_cols <- max(sapply(tree_matrix, length)) # Find the max column count @@ -150,10 +145,6 @@ parseTree <- function(tree_lines) { return(tree_df) } - - - - #' infer relationship from tree template #' #' @param tree_long A data frame containing the tree structure in long format. @@ -161,8 +152,7 @@ parseTree <- function(tree_lines) { #' @return A data frame containing the relationships between family members. #' @keywords internal #' -parseRelationships <- function(tree_long, tree_paths=NULL) { - +parseTreeRelationships <- function(tree_long, tree_paths = NULL) { # Check if tree_paths is NULL and call traceTreePaths if necessary if (is.null(tree_paths)) { tree_paths <- traceTreePaths(tree_long, deduplicate = FALSE) @@ -194,26 +184,22 @@ parseRelationships <- function(tree_long, tree_paths=NULL) { ] <- "spouse" # Parent-child: + and y both present - tree_paths$relationship[ + tree_paths$relationship[ grepl("\\+", tree_paths$intermediate_values) & - grepl("y", tree_paths$intermediate_values) + grepl("y", tree_paths$intermediate_values) ] <- "offspring" -tree_paths$relationship[ - is.na(tree_paths$relationship) & grepl("y", tree_paths$intermediate_values) -] <- "offspring" - - - - # determine direction - tree_paths$relationship[grepl("^\\+", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "parent-child" - tree_paths$relationship[grepl("[y\\|]$", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "parent-child" - tree_paths$relationship[grepl("\\+$", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "child-parent" - tree_paths$relationship[grepl("^[y\\|]", tree_paths$intermediate_values) & tree_paths$relationship=="offspring"] <- "child-parent" - + tree_paths$relationship[ + is.na(tree_paths$relationship) & grepl("y", tree_paths$intermediate_values) + ] <- "offspring" + # determine direction + tree_paths$relationship[grepl("^\\+", tree_paths$intermediate_values) & tree_paths$relationship == "offspring"] <- "parent-child" + tree_paths$relationship[grepl("[y\\|]$", tree_paths$intermediate_values) & tree_paths$relationship == "offspring"] <- "parent-child" + tree_paths$relationship[grepl("\\+$", tree_paths$intermediate_values) & tree_paths$relationship == "offspring"] <- "child-parent" + tree_paths$relationship[grepl("^[y\\|]", tree_paths$intermediate_values) & tree_paths$relationship == "offspring"] <- "child-parent" -# Fill spouse links + # Fill spouse links spouse_links <- tree_paths[tree_paths$relationship == "spouse", ] for (i in seq_len(nrow(spouse_links))) { @@ -227,22 +213,27 @@ tree_paths$relationship[ pc_links <- tree_paths[tree_paths$relationship == "parent-child", ] for (i in seq_len(nrow(pc_links))) { - relationships <- assignParent(df=relationships, - child=pc_links$to_id[i], - parent=pc_links$from_id[i]) + relationships <- populateParents( + df = relationships, + child = pc_links$to_id[i], + parent = pc_links$from_id[i] + ) } # --- Child-parent (to_id = parent) --- cp_links <- tree_paths[tree_paths$relationship == "child-parent", ] for (i in seq_len(nrow(cp_links))) { - parent <- cp_links$to_id[i] - child <- cp_links$from_id[i] - relationships <- assignParent(relationships, child, parent) + relationships <- populateParents( + df = relationships, + child = cp_links$from_id[i], + parent = cp_links$to_id[i] + ) } - out <- list(tree_paths = tree_paths, - relationships = relationships) - + out <- list( + tree_paths = tree_paths, + relationships = relationships + ) return(out) } @@ -252,18 +243,20 @@ tree_paths$relationship[ #' @param parent The ID of the parent. #' @return A data frame with updated parent information. #' @keywords internal -assignParent <- function(df, child, parent) { - idx <- which(df$id == child) - if (length(idx) != 1) return(df) - - if (is.na(df$parent_1[idx])) { - df$parent_1[idx] <- parent - } else if (is.na(df$parent_2[idx]) && df$parent_1[idx] != parent) { - df$parent_2[idx] <- parent - } +populateParents <- function(df, child, parent) { + idx <- which(df$id == child) + if (length(idx) != 1) { return(df) } + if (is.na(df$parent_1[idx])) { + df$parent_1[idx] <- parent + } else if (is.na(df$parent_2[idx]) && df$parent_1[idx] != parent) { + df$parent_2[idx] <- parent + } + return(df) +} + #' Trace paths between individuals in a family tree grid #' @@ -271,20 +264,21 @@ assignParent <- function(df, child, parent) { #' @param deduplicate Logical, if TRUE, will remove duplicate paths #' @return A data.frame with columns: from_id, to_id, direction, path_length, intermediates #' @export +#' traceTreePaths <- function(tree_long, deduplicate = TRUE) { # Keep only relevant cells (people and path symbols) path_symbols <- c("|", "-", "+", "v", "^", "y", ",", ".", "`", "!", "~", "x", ")", "(") tree_long$Value <- gsub("\\s+", "", tree_long$Value) # Remove whitespace active_cells <- tree_long[!is.na(tree_long$Value) & - (tree_long$Value %in% path_symbols | !is.na(tree_long$id)), ] + (tree_long$Value %in% path_symbols | !is.na(tree_long$id)), ] active_cells$key <- paste(active_cells$Row, active_cells$Column, sep = "_") - edges <- do.call(rbind, lapply(seq_len(nrow(active_cells)), function(i) { from_key <- active_cells$key[i] - to_keys <- findNeighbors(active_cells[i, ], - active_keys=active_cells$key) + to_keys <- getGridNeighbors(active_cells[i, ], + active_keys = active_cells$key + ) if (length(to_keys) > 0) { data.frame(from = from_key, to = to_keys, stringsAsFactors = FALSE) } @@ -327,21 +321,22 @@ traceTreePaths <- function(tree_long, deduplicate = TRUE) { intermediates = paste(intermediate, collapse = ";"), intermediate_values = paste(intermediate_values, collapse = ""), stringsAsFactors = FALSE - ))} else { - # If no path found, add a row with NA values - result <- rbind(result, data.frame( - from_id = id_map[[from_key]], - to_id = id_map[[to_key]], - path_length = NA, - intermediates = NA, - intermediate_values = NA, - stringsAsFactors = FALSE - )) - } + )) + } else { + # If no path found, add a row with NA values + result <- rbind(result, data.frame( + from_id = id_map[[from_key]], + to_id = id_map[[to_key]], + path_length = NA, + intermediates = NA, + intermediate_values = NA, + stringsAsFactors = FALSE + )) } } + } - if(deduplicate==TRUE){ + if (deduplicate == TRUE) { # Deduplicate pairs result <- deduplicatePairs(result) } @@ -355,8 +350,8 @@ traceTreePaths <- function(tree_long, deduplicate = TRUE) { #' @return A character vector of neighboring cell keys #' @keywords internal -findNeighbors <- function(cell,active_keys) { - offsets <- list(c(1, 0), c(-1, 0), c(0, 1), c(0, -1)) # down, up, right, left +getGridNeighbors <- function(cell, active_keys) { + offsets <- list(c(1, 0), c(-1, 0), c(0, 1), c(0, -1)) # down, up, right, left out <- character() for (offset in offsets) { r2 <- cell$Row + offset[1] diff --git a/man/parseTree.Rd b/man/buildTreeGrid.Rd similarity index 82% rename from man/parseTree.Rd rename to man/buildTreeGrid.Rd index 6982013e..13205cee 100644 --- a/man/parseTree.Rd +++ b/man/buildTreeGrid.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{parseTree} -\alias{parseTree} +\name{buildTreeGrid} +\alias{buildTreeGrid} \title{Parse Tree} \usage{ -parseTree(tree_lines) +buildTreeGrid(tree_lines) } \arguments{ \item{tree_lines}{A character vector containing the lines of the tree structure.} diff --git a/man/makeLongTree.Rd b/man/convertGrid2LongTree.Rd similarity index 78% rename from man/makeLongTree.Rd rename to man/convertGrid2LongTree.Rd index 96d4a514..d3bd5b5f 100644 --- a/man/makeLongTree.Rd +++ b/man/convertGrid2LongTree.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{makeLongTree} -\alias{makeLongTree} +\name{convertGrid2LongTree} +\alias{convertGrid2LongTree} \title{Make Long Tree} \usage{ -makeLongTree(tree_df, cols_to_pivot) +convertGrid2LongTree(tree_df, cols_to_pivot) } \arguments{ \item{tree_df}{A data frame containing the tree structure.} diff --git a/man/matchMembers.Rd b/man/extractMemberTable.Rd similarity index 82% rename from man/matchMembers.Rd rename to man/extractMemberTable.Rd index 382c05ec..72b0945b 100644 --- a/man/matchMembers.Rd +++ b/man/extractMemberTable.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{matchMembers} -\alias{matchMembers} +\name{extractMemberTable} +\alias{extractMemberTable} \title{Match Members} \usage{ -matchMembers(text) +extractMemberTable(text) } \arguments{ \item{text}{A character string containing the text of a family tree in wiki format.} diff --git a/man/findNeighbors.Rd b/man/getGridNeighbors.Rd similarity index 80% rename from man/findNeighbors.Rd rename to man/getGridNeighbors.Rd index 58de5a20..cc1e20fa 100644 --- a/man/findNeighbors.Rd +++ b/man/getGridNeighbors.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{findNeighbors} -\alias{findNeighbors} +\name{getGridNeighbors} +\alias{getGridNeighbors} \title{Build adjacency list (4-way neighbors)} \usage{ -findNeighbors(cell, active_keys) +getGridNeighbors(cell, active_keys) } \arguments{ \item{cell}{A data frame with columns Row and Column} diff --git a/man/extractSummaryText.Rd b/man/getWikiTreeSummary.Rd similarity index 81% rename from man/extractSummaryText.Rd rename to man/getWikiTreeSummary.Rd index 9b12be26..dadc4c33 100644 --- a/man/extractSummaryText.Rd +++ b/man/getWikiTreeSummary.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{extractSummaryText} -\alias{extractSummaryText} +\name{getWikiTreeSummary} +\alias{getWikiTreeSummary} \title{Extract Summary Text} \usage{ -extractSummaryText(text) +getWikiTreeSummary(text) } \arguments{ \item{text}{A character string containing the text of a family tree in wiki format.} diff --git a/man/parseRelationships.Rd b/man/parseTreeRelationships.Rd similarity index 80% rename from man/parseRelationships.Rd rename to man/parseTreeRelationships.Rd index 5e017a3a..a29a56da 100644 --- a/man/parseRelationships.Rd +++ b/man/parseTreeRelationships.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{parseRelationships} -\alias{parseRelationships} +\name{parseTreeRelationships} +\alias{parseTreeRelationships} \title{infer relationship from tree template} \usage{ -parseRelationships(tree_long, tree_paths = NULL) +parseTreeRelationships(tree_long, tree_paths = NULL) } \arguments{ \item{tree_long}{A data frame containing the tree structure in long format.} diff --git a/man/assignParent.Rd b/man/populateParents.Rd similarity index 82% rename from man/assignParent.Rd rename to man/populateParents.Rd index a4a58c44..f0871d6a 100644 --- a/man/assignParent.Rd +++ b/man/populateParents.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/readWikifamilytree.R -\name{assignParent} -\alias{assignParent} +\name{populateParents} +\alias{populateParents} \title{Assign Parent} \usage{ -assignParent(df, child, parent) +populateParents(df, child, parent) } \arguments{ \item{df}{A data frame containing the relationships.} diff --git a/tests/testthat/test-readWikiTree.R b/tests/testthat/test-readWikiTree.R index 03067c2c..3ab920bd 100644 --- a/tests/testthat/test-readWikiTree.R +++ b/tests/testthat/test-readWikiTree.R @@ -7,17 +7,17 @@ test_that("traceTreePaths works correctly for horizontal tree", { # with Row, Column, Value, and id columns # The id column is used to identify the nodes in the tree -tree_horizontal <- data.frame( - Row = rep(1, 3), - Column = 1:3, - Value = c("A", "+", "B"), - stringsAsFactors = FALSE -) -tree_horizontal$id <- NA -tree_horizontal$id[tree_horizontal$Value %in% c("A", "B")] <- tree_horizontal$Value[tree_horizontal$Value %in% c("A", "B")] - -result <- traceTreePaths(tree_horizontal) -# Check the result + tree_horizontal <- data.frame( + Row = rep(1, 3), + Column = 1:3, + Value = c("A", "+", "B"), + stringsAsFactors = FALSE + ) + tree_horizontal$id <- NA + tree_horizontal$id[tree_horizontal$Value %in% c("A", "B")] <- tree_horizontal$Value[tree_horizontal$Value %in% c("A", "B")] + + result <- traceTreePaths(tree_horizontal) + # Check the result expect_equal(result$path[1], "A") expect_equal(result$path[2], "B") expect_equal(result$path[3], NA) @@ -29,18 +29,18 @@ test_that("traceTreePaths works correctly for vertical tree", { # This is a simplified version of the tree_vertical data frame # with Row, Column, Value, and id columns # The id column is used to identify the nodes in the tree -tree_spouse_child <- data.frame( - Row = c(1, 1, 1, 2, 3, 4, 5), - Column = c(1, 2, 3, 2, 2, 2, 2), - Value = c("A", "+", "B", "|", "y", "|", "C"), - stringsAsFactors = FALSE -) -tree_spouse_child$id <- NA -tree_spouse_child$id[tree_spouse_child$Value %in% c("A", "B", "C")] <- tree_spouse_child$Value[tree_spouse_child$Value %in% c("A", "B", "C")] - -result <- traceTreePaths(tree_spouse_child) - -expect_equal(result$path[1], "A") + tree_spouse_child <- data.frame( + Row = c(1, 1, 1, 2, 3, 4, 5), + Column = c(1, 2, 3, 2, 2, 2, 2), + Value = c("A", "+", "B", "|", "y", "|", "C"), + stringsAsFactors = FALSE + ) + tree_spouse_child$id <- NA + tree_spouse_child$id[tree_spouse_child$Value %in% c("A", "B", "C")] <- tree_spouse_child$Value[tree_spouse_child$Value %in% c("A", "B", "C")] + + result <- traceTreePaths(tree_spouse_child) + + expect_equal(result$path[1], "A") expect_equal(result$path[2], "B") expect_equal(result$path[3], "C") expect_equal(result$path[4], "y") From 3373800487629a67f7cf60ccea36be197c94dc8d Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Sat, 19 Apr 2025 14:25:52 -0400 Subject: [PATCH 05/11] Update test-readWikiTree.R tests now work --- tests/testthat/test-readWikiTree.R | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-readWikiTree.R b/tests/testthat/test-readWikiTree.R index 3ab920bd..df894c08 100644 --- a/tests/testthat/test-readWikiTree.R +++ b/tests/testthat/test-readWikiTree.R @@ -18,9 +18,11 @@ test_that("traceTreePaths works correctly for horizontal tree", { result <- traceTreePaths(tree_horizontal) # Check the result - expect_equal(result$path[1], "A") - expect_equal(result$path[2], "B") - expect_equal(result$path[3], NA) + # Check the result + expect_equal(names(result), c("from_id", "to_id", "path_length", "intermediates","intermediate_values")) + expect_equal(c("A", "B") %in% c(result$from_id, result$to_id), rep(TRUE,2)) + expect_equal(result$path_length[result$from_id == "A" & result$to_id == "B"], 2) + expect_equal(result$intermediate_values[result$from_id == "A" & result$to_id == "B"], "+") }) @@ -39,12 +41,14 @@ test_that("traceTreePaths works correctly for vertical tree", { tree_spouse_child$id[tree_spouse_child$Value %in% c("A", "B", "C")] <- tree_spouse_child$Value[tree_spouse_child$Value %in% c("A", "B", "C")] result <- traceTreePaths(tree_spouse_child) + # Check the result + expect_equal(names(result), c("from_id", "to_id", "path_length", "intermediates","intermediate_values")) + expect_equal(c("A", "B", "C") %in% c(result$from_id, result$to_id), rep(TRUE,3)) + expect_equal(result$path_length[result$from_id == "A" & result$to_id == "B"], 2) + expect_equal(result$path_length[result$from_id == "A" & result$to_id == "C"], 5) + expect_equal(result$path_length[result$from_id == "B" & result$to_id == "C"], 5) + expect_equal(result$intermediate_values[result$from_id == "A" & result$to_id == "B"], "+") - expect_equal(result$path[1], "A") - expect_equal(result$path[2], "B") - expect_equal(result$path[3], "C") - expect_equal(result$path[4], "y") - expect_equal(result$path[5], NA) }) @@ -79,8 +83,7 @@ test_that("readWikifamilytree reads a string correctly", { expect_equal( result2$summary, - "I have a brother Joe and a little sister: my mom married my dad, and my dad's parents were Grandma and Grandpa; they had another child, Aunt Daisy." - ) + result$summary) }) From 00ac669376d293449a3e6f199120860b0f339e06 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 21 Apr 2025 13:52:26 -0400 Subject: [PATCH 06/11] rename --- .../{modelingrelatedness.Rmd => modelingvariancecomponents.Rmd} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename vignettes/{modelingrelatedness.Rmd => modelingvariancecomponents.Rmd} (98%) diff --git a/vignettes/modelingrelatedness.Rmd b/vignettes/modelingvariancecomponents.Rmd similarity index 98% rename from vignettes/modelingrelatedness.Rmd rename to vignettes/modelingvariancecomponents.Rmd index 6f793690..c82441a9 100644 --- a/vignettes/modelingrelatedness.Rmd +++ b/vignettes/modelingvariancecomponents.Rmd @@ -2,7 +2,7 @@ title: "Modeling variance components" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{modelingandrelatedness} + %\VignetteIndexEntry{modelingvariancecomponents} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- From 6c3603b1588fddac0c1a45c052ad8beca16a7309 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 21 Apr 2025 16:31:31 -0400 Subject: [PATCH 07/11] rename --- R/{convertPedigree.R => buildComponent.R} | 0 R/{buildPedigree.R => segmentPedigree.R} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename R/{convertPedigree.R => buildComponent.R} (100%) rename R/{buildPedigree.R => segmentPedigree.R} (100%) diff --git a/R/convertPedigree.R b/R/buildComponent.R similarity index 100% rename from R/convertPedigree.R rename to R/buildComponent.R diff --git a/R/buildPedigree.R b/R/segmentPedigree.R similarity index 100% rename from R/buildPedigree.R rename to R/segmentPedigree.R From 2038a66928adc115310e6f618f0d0907c5d3387a Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 21 Apr 2025 16:39:00 -0400 Subject: [PATCH 08/11] split out adjacency --- R/buildComponent.R | 528 ---------------------------------------- R/constructAdjacency.R | 529 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 529 insertions(+), 528 deletions(-) create mode 100644 R/constructAdjacency.R diff --git a/R/buildComponent.R b/R/buildComponent.R index 86e9970a..dda09f1f 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -482,531 +482,3 @@ ped2ce <- function(ped, } } -.adjLoop <- function(ped, component, saveable, resume, - save_path, verbose, lastComputed, - nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, - ...) { - # Loop through each individual in the pedigree - # Build the adjacency matrix for parent-child relationships - # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. - ped$momID <- as.numeric(ped$momID) - ped$dadID <- as.numeric(ped$dadID) - ped$ID <- as.numeric(ped$ID) - - for (i in (lastComputed + 1):nr) { - x <- ped[i, , drop = FALSE] - # Handle parentage according to the 'component' specified - if (component %in% c("generation", "additive")) { - # Code for 'generation' and 'additive' components - # Checks if is mom of ID or is dad of ID - xID <- as.numeric(x["ID"]) - sMom <- (xID == ped$momID) - sDad <- (xID == ped$dadID) - val <- sMom | sDad - val[is.na(val)] <- FALSE - } else if (component %in% c("common nuclear")) { - # Code for 'common nuclear' component - # IDs have the Same mom and Same dad - sMom <- (as.numeric(x["momID"]) == ped$momID) - sMom[is.na(sMom)] <- FALSE - sDad <- (as.numeric(x["dadID"]) == ped$dadID) - sDad[is.na(sDad)] <- FALSE - val <- sMom & sDad - } else if (component %in% c("mitochondrial")) { - # Code for 'mitochondrial' component - val <- (as.numeric(x["ID"]) == ped$momID) - val[is.na(val)] <- FALSE - } else { - stop("Unknown relatedness component requested") - } - # Storing the indices of the parent-child relationships - # Keep track of indices only, and then initialize a single sparse matrix - wv <- which(val) - parList[[i]] <- wv - lens[i] <- length(wv) - # Print progress if verbose is TRUE - if (verbose && (i %% update_rate == 0)) { - cat(paste0("Done with ", i, " of ", nr, "\n")) - } - # Checkpointing every save_rate iterations - if (saveable && (i %% save_rate_parlist == 0)) { - saveRDS(parList, file = checkpoint_files$parList) - saveRDS(lens, file = checkpoint_files$lens) - if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n") - } - } - jss <- rep(1L:nr, times = lens) - iss <- unlist(parList) - list_of_adjacency <- list(iss = iss, jss = jss) - return(list_of_adjacency) -} - -.adjIndexed <- function(ped, component, saveable, resume, - save_path, verbose, lastComputed, - nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist) { - # Loop through each individual in the pedigree - # Build the adjacency matrix for parent-child relationships - # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. - - # Convert IDs - ped$ID <- as.numeric(ped$ID) - ped$momID <- as.numeric(ped$momID) - ped$dadID <- as.numeric(ped$dadID) - - # parent-child lookup - mom_index <- match(ped$momID, ped$ID, nomatch = 0) - dad_index <- match(ped$dadID, ped$ID, nomatch = 0) - - for (i in (lastComputed + 1):nr) { - if (component %in% c("generation", "additive")) { - sMom <- (mom_index == i) - sDad <- (dad_index == i) - val <- sMom | sDad - } else if (component %in% c("common nuclear")) { - # Code for 'common nuclear' component - # IDs have the Same mom and Same dad - sMom <- (ped$momID[i] == ped$momID) - sMom[is.na(sMom)] <- FALSE - sDad <- (ped$dadID[i] == ped$dadID) - sDad[is.na(sDad)] <- FALSE - val <- sMom & sDad - } else if (component %in% c("mitochondrial")) { - val <- (mom_index == i) - } else { - stop("Unknown relatedness component requested") - } - - val[is.na(val)] <- FALSE - parList[[i]] <- which(val) - lens[i] <- length(parList[[i]]) - - # Print progress if verbose is TRUE - if (verbose && (i %% update_rate == 0)) { - cat(paste0("Done with ", i, " of ", nr, "\n")) - } - - # Checkpointing every save_rate iterations - if (saveable && (i %% save_rate_parlist == 0)) { - saveRDS(parList, file = checkpoint_files$parList) - saveRDS(lens, file = checkpoint_files$lens) - if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n") - } - } - jss <- rep(1L:nr, times = lens) - iss <- unlist(parList) - list_of_adjacency <- list(iss = iss, jss = jss) - return(list_of_adjacency) -} - -.adjDirect <- function(ped, component, saveable, resume, - save_path, verbose, lastComputed, - nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, adjBeta_method, - ...) { - # Loop through each individual in the pedigree - # Build the adjacency matrix for parent-child relationships - # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. - uniID <- ped$ID # live dangerously without sort(unique(ped$ID)) - ped$ID <- as.numeric(factor(ped$ID, levels = uniID)) - ped$momID <- as.numeric(factor(ped$momID, levels = uniID)) - ped$dadID <- as.numeric(factor(ped$dadID, levels = uniID)) - - if (component %in% c("generation", "additive")) { - mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) - dIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$dadID)) - iss <- c(mIDs$rID, dIDs$rID) - jss <- c(mIDs$cID, dIDs$cID) - } else if (component %in% c("common nuclear")) { - # message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") - - # 1) Create a logical mask for only known parents - mask <- !is.na(ped$momID) & !is.na(ped$dadID) - - # 2) Create a single hash label for each known (momID, dadID) pair - base <- max(ped$ID, na.rm = TRUE) + 1L - pairCode <- ped$momID[mask] + base * ped$dadID[mask] - - # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - childVec <- which(mask) - - # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" - groupList <- split(childVec, pairCode) - - # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j - iss_list <- vector("list", length(groupList)) - jss_list <- vector("list", length(groupList)) - counter <- 1 - - for (g in groupList) { - k <- length(g) - if (k > 1) { - # We'll form all k^2 combos, then remove the diagonal i=j - # rep() calls faster than expand.grid - - # v = each child repeated k times - # w = entire group repeated once for each child - v <- rep(g, each = k) # row index - w <- rep(g, times = k) # col index - - keep <- (v != w) # remove diagonal where v == w - iss_list[[counter]] <- v[keep] - jss_list[[counter]] <- w[keep] - counter <- counter + 1 - } - } - - - iss <- unlist(iss_list, use.names = FALSE) - jss <- unlist(jss_list, use.names = FALSE) - - # list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method, - # component = component, - # saveable = saveable, resume = resume, - # save_path = save_path, verbose = verbose, - # lastComputed = lastComputed, nr = nr, - # checkpoint_files = checkpoint_files, - # update_rate = update_rate, - # parList = parList, - # lens = lens, save_rate_parlist = save_rate_parlist, - # ...) - - # return(list_of_adjacency) - } else if (component %in% c("mitochondrial")) { - mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) - iss <- c(mIDs$rID) - jss <- c(mIDs$cID) - } else { - stop("Unknown relatedness component requested") - } - list_of_adjacency <- list( - iss = iss, - jss = jss - ) - return(list_of_adjacency) -} - -#' Compute Parent Adjacency Matrix with Multiple Approaches -#' @inheritParams ped2com -#' @inherit ped2com details -#' @param nr the number of rows in the pedigree dataset -#' @param lastComputed the last computed index -#' @param parList a list of parent-child relationships -#' @param lens a vector of the lengths of the parent-child relationships -#' @param checkpoint_files a list of checkpoint files - -compute_parent_adjacency <- function(ped, component, - adjacency_method = "direct", - saveable, resume, - save_path, verbose = FALSE, - lastComputed = 0, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, adjBeta_method = NULL, - ...) { - if (adjacency_method == "loop") { - if (lastComputed < nr) { # Original version - list_of_adjacency <- .adjLoop( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "indexed") { # Garrison version - if (lastComputed < nr) { - list_of_adjacency <- .adjIndexed( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "direct") { # Hunter version - if (lastComputed < nr) { - list_of_adjacency <- .adjDirect( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "beta") { - list_of_adjacency <- .adjBeta( - ped = ped, - adjBeta_method = adjBeta_method, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } else { - stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") - } - if (saveable) { - saveRDS(parList, file = checkpoint_files$parList) - saveRDS(lens, file = checkpoint_files$lens) - if (verbose) { - cat("Final checkpoint saved for adjacency matrix.\n") - } - } - return(list_of_adjacency) -} - - -#' Determine isChild Status, isChild is the 'S' matrix from RAM -#' @param isChild_method method to determine isChild status -#' @param ped pedigree data frame -#' @return isChild 'S' matrix -#' - -isChild <- function(isChild_method, ped) { - if (isChild_method == "partialparent") { - isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { - .5 + .25 * sum(is.na(x)) # 2 parents -> .5, 1 parent -> .75, 0 parents -> 1 - }) - } else { - isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { - 2^(-!all(is.na(x))) - }) - } -} - - -.adjBeta <- function(ped, component, - adjBeta_method = 5, - parList = NULL, - lastComputed = 0, - nr = NULL, - lens = NULL, - saveable = FALSE, - resume = FALSE, - save_path = NULL, - verbose = FALSE, - save_rate_parlist = NULL, - update_rate = NULL, - checkpoint_files = NULL, - ...) { # 1) Pairwise compare mother IDs - if (adjBeta_method == 1) { - # gets slow when data are bigger. much slower than indexed - momMatch <- outer(ped$momID, ped$momID, FUN = "==") - momMatch[is.na(momMatch)] <- FALSE - - # 2) Pairwise compare father IDs - dadMatch <- outer(ped$dadID, ped$dadID, FUN = "==") - dadMatch[is.na(dadMatch)] <- FALSE - - # 3) Sibling adjacency if both mom & dad match - adj <- momMatch & dadMatch - - # 4) Extract indices where adj[i,j] is TRUE - w <- which(adj, arr.ind = TRUE) - # iss <- w[, 1] - # jss <- w[, 2] - # - list_of_adjacency <- list( - iss = w[, 1], - jss = w[, 2] - ) - } else if (adjBeta_method == 2) { - # 1) Create a logical mask for known parents - mask <- !is.na(ped$momID) & !is.na(ped$dadID) - - # 2) Create a single string label for each known (momID, dadID) pair - pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) - - # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - # This is "creating a new ID" for each unique parent pair - pairCode <- match(pairLabel, unique(pairLabel)) - - # childVec are the row indices in 'ped' that have known parents - childVec <- which(mask) # length(childVec) = sum(mask) - - # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" - groupList <- split(childVec, pairCode) - - # 5) For each group with >1 children, form pairwise adjacency i->j - iss_list <- list() - jss_list <- list() - counter <- 1 - - for (g in groupList) { - if (length(g) > 1) { - combos <- expand.grid(g, g, KEEP.OUT.ATTRS = FALSE) - combos <- combos[combos[, 1] != combos[, 2], , drop = FALSE] - iss_list[[counter]] <- combos[, 1] - jss_list[[counter]] <- combos[, 2] - counter <- counter + 1 - } - } - # iss <- unlist(iss_list, use.names = FALSE) - # jss <- unlist(jss_list, use.names = FALSE) - - list_of_adjacency <- list( - iss = unlist(iss_list, use.names = FALSE), - jss = unlist(jss_list, use.names = FALSE) - ) - } else if (adjBeta_method == 3) { - nr <- nrow(ped) - # terrible - # Define a scalar-checking function: - f_check <- function(i, j) { - # i, j are each single integers - # Return one boolean: do they share both parents? - !is.na(ped$momID[i]) && !is.na(ped$dadID[i]) && - !is.na(ped$momID[j]) && !is.na(ped$dadID[j]) && - (ped$momID[i] == ped$momID[j]) && - (ped$dadID[i] == ped$dadID[j]) - } - - # Vectorize it so outer() will produce an nr x nr matrix - vf_check <- Vectorize(f_check) - - # Now outer() calls vf_check(...) in a way that yields scalar results - adj <- outer(seq_len(nr), seq_len(nr), FUN = vf_check) - - # Extract which cells of adj are TRUE - w <- which(adj, arr.ind = TRUE) - # iss <- w[, 1] - # jss <- w[, 2] - - list_of_adjacency <- list( - iss = iss <- w[, 1], - jss = jss <- w[, 2] - ) - } else if (adjBeta_method == 4) { - # 1) Create a logical mask for known parents - mask <- !is.na(ped$momID) & !is.na(ped$dadID) - - # 2) Create a single string label for each known (momID, dadID) pair - pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) - - # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - pairCode <- match(pairLabel, unique(pairLabel)) - - # childVec are the row indices in 'ped' that have known parents - childVec <- which(mask) - - # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" - groupList <- split(childVec, pairCode) - - # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j - iss_list <- vector("list", length(groupList)) - jss_list <- vector("list", length(groupList)) - counter <- 1 - - for (g in groupList) { - k <- length(g) - if (k > 1) { - # We'll form all k^2 combos, then remove the diagonal i=j - # Instead of expand.grid, do rep() calls: - - # v = each child repeated k times - # w = entire group repeated once for each child - v <- rep(g, each = k) # row index - w <- rep(g, times = k) # col index - - keep <- (v != w) # remove diagonal where v == w - iss_list[[counter]] <- v[keep] - jss_list[[counter]] <- w[keep] - counter <- counter + 1 - } - } - - list_of_adjacency <- list( - iss = unlist(iss_list, use.names = FALSE), - jss = unlist(jss_list, use.names = FALSE) - ) - } else if (adjBeta_method == 5) { - # 1) Create a logical mask for known parents - mask <- !is.na(ped$momID) & !is.na(ped$dadID) - - # 2) Create a single hash label for each known (momID, dadID) pair - # pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) - base <- max(ped$ID, na.rm = TRUE) + 1L - pairCode <- ped$momID[mask] + base * ped$dadID[mask] - - # 3) Factor that label => each row with the same (mom,dad) gets the same integer code - childVec <- which(mask) - - # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" - groupList <- split(childVec, pairCode) - - # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j - iss_list <- vector("list", length(groupList)) - jss_list <- vector("list", length(groupList)) - counter <- 1 - - for (g in groupList) { - k <- length(g) - if (k > 1) { - # We'll form all k^2 combos, then remove the diagonal i=j - # Instead of expand.grid, do rep() calls: - - # v = each child repeated k times - # w = entire group repeated once for each child - v <- rep(g, each = k) # row index - w <- rep(g, times = k) # col index - - keep <- (v != w) # remove diagonal where v == w - iss_list[[counter]] <- v[keep] - jss_list[[counter]] <- w[keep] - counter <- counter + 1 - } - } - - list_of_adjacency <- list( - iss = unlist(iss_list, use.names = FALSE), - jss = unlist(jss_list, use.names = FALSE) - ) - } else { - list_of_adjacency <- .adjIndexed( - ped = ped, component = component, - saveable = saveable, resume = resume, - save_path = save_path, verbose = verbose, - lastComputed = lastComputed, nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, parList = parList, - lens = lens, save_rate_parlist = save_rate_parlist - ) - } - return(list_of_adjacency) -} diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R new file mode 100644 index 00000000..cc3a4e7f --- /dev/null +++ b/R/constructAdjacency.R @@ -0,0 +1,529 @@ + +.adjLoop <- function(ped, component, saveable, resume, + save_path, verbose, lastComputed, + nr, checkpoint_files, update_rate, + parList, lens, save_rate_parlist, + ...) { + # Loop through each individual in the pedigree + # Build the adjacency matrix for parent-child relationships + # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. + ped$momID <- as.numeric(ped$momID) + ped$dadID <- as.numeric(ped$dadID) + ped$ID <- as.numeric(ped$ID) + + for (i in (lastComputed + 1):nr) { + x <- ped[i, , drop = FALSE] + # Handle parentage according to the 'component' specified + if (component %in% c("generation", "additive")) { + # Code for 'generation' and 'additive' components + # Checks if is mom of ID or is dad of ID + xID <- as.numeric(x["ID"]) + sMom <- (xID == ped$momID) + sDad <- (xID == ped$dadID) + val <- sMom | sDad + val[is.na(val)] <- FALSE + } else if (component %in% c("common nuclear")) { + # Code for 'common nuclear' component + # IDs have the Same mom and Same dad + sMom <- (as.numeric(x["momID"]) == ped$momID) + sMom[is.na(sMom)] <- FALSE + sDad <- (as.numeric(x["dadID"]) == ped$dadID) + sDad[is.na(sDad)] <- FALSE + val <- sMom & sDad + } else if (component %in% c("mitochondrial")) { + # Code for 'mitochondrial' component + val <- (as.numeric(x["ID"]) == ped$momID) + val[is.na(val)] <- FALSE + } else { + stop("Unknown relatedness component requested") + } + # Storing the indices of the parent-child relationships + # Keep track of indices only, and then initialize a single sparse matrix + wv <- which(val) + parList[[i]] <- wv + lens[i] <- length(wv) + # Print progress if verbose is TRUE + if (verbose && (i %% update_rate == 0)) { + cat(paste0("Done with ", i, " of ", nr, "\n")) + } + # Checkpointing every save_rate iterations + if (saveable && (i %% save_rate_parlist == 0)) { + saveRDS(parList, file = checkpoint_files$parList) + saveRDS(lens, file = checkpoint_files$lens) + if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n") + } + } + jss <- rep(1L:nr, times = lens) + iss <- unlist(parList) + list_of_adjacency <- list(iss = iss, jss = jss) + return(list_of_adjacency) +} + +.adjIndexed <- function(ped, component, saveable, resume, + save_path, verbose, lastComputed, + nr, checkpoint_files, update_rate, + parList, lens, save_rate_parlist) { + # Loop through each individual in the pedigree + # Build the adjacency matrix for parent-child relationships + # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. + + # Convert IDs + ped$ID <- as.numeric(ped$ID) + ped$momID <- as.numeric(ped$momID) + ped$dadID <- as.numeric(ped$dadID) + + # parent-child lookup + mom_index <- match(ped$momID, ped$ID, nomatch = 0) + dad_index <- match(ped$dadID, ped$ID, nomatch = 0) + + for (i in (lastComputed + 1):nr) { + if (component %in% c("generation", "additive")) { + sMom <- (mom_index == i) + sDad <- (dad_index == i) + val <- sMom | sDad + } else if (component %in% c("common nuclear")) { + # Code for 'common nuclear' component + # IDs have the Same mom and Same dad + sMom <- (ped$momID[i] == ped$momID) + sMom[is.na(sMom)] <- FALSE + sDad <- (ped$dadID[i] == ped$dadID) + sDad[is.na(sDad)] <- FALSE + val <- sMom & sDad + } else if (component %in% c("mitochondrial")) { + val <- (mom_index == i) + } else { + stop("Unknown relatedness component requested") + } + + val[is.na(val)] <- FALSE + parList[[i]] <- which(val) + lens[i] <- length(parList[[i]]) + + # Print progress if verbose is TRUE + if (verbose && (i %% update_rate == 0)) { + cat(paste0("Done with ", i, " of ", nr, "\n")) + } + + # Checkpointing every save_rate iterations + if (saveable && (i %% save_rate_parlist == 0)) { + saveRDS(parList, file = checkpoint_files$parList) + saveRDS(lens, file = checkpoint_files$lens) + if (verbose) cat("Checkpointed parlist saved at iteration", i, "\n") + } + } + jss <- rep(1L:nr, times = lens) + iss <- unlist(parList) + list_of_adjacency <- list(iss = iss, jss = jss) + return(list_of_adjacency) +} + +.adjDirect <- function(ped, component, saveable, resume, + save_path, verbose, lastComputed, + nr, checkpoint_files, update_rate, + parList, lens, save_rate_parlist, adjBeta_method, + ...) { + # Loop through each individual in the pedigree + # Build the adjacency matrix for parent-child relationships + # Is person in column j the parent of the person in row i? .5 for yes, 0 for no. + uniID <- ped$ID # live dangerously without sort(unique(ped$ID)) + ped$ID <- as.numeric(factor(ped$ID, levels = uniID)) + ped$momID <- as.numeric(factor(ped$momID, levels = uniID)) + ped$dadID <- as.numeric(factor(ped$dadID, levels = uniID)) + + if (component %in% c("generation", "additive")) { + mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) + dIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$dadID)) + iss <- c(mIDs$rID, dIDs$rID) + jss <- c(mIDs$cID, dIDs$cID) + } else if (component %in% c("common nuclear")) { + # message("Common Nuclear component is not yet implemented for direct method. Using index method.\n") + + # 1) Create a logical mask for only known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single hash label for each known (momID, dadID) pair + base <- max(ped$ID, na.rm = TRUE) + 1L + pairCode <- ped$momID[mask] + base * ped$dadID[mask] + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 + + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # rep() calls faster than expand.grid + + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + + iss <- unlist(iss_list, use.names = FALSE) + jss <- unlist(jss_list, use.names = FALSE) + + # list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method, + # component = component, + # saveable = saveable, resume = resume, + # save_path = save_path, verbose = verbose, + # lastComputed = lastComputed, nr = nr, + # checkpoint_files = checkpoint_files, + # update_rate = update_rate, + # parList = parList, + # lens = lens, save_rate_parlist = save_rate_parlist, + # ...) + + # return(list_of_adjacency) + } else if (component %in% c("mitochondrial")) { + mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) + iss <- c(mIDs$rID) + jss <- c(mIDs$cID) + } else { + stop("Unknown relatedness component requested") + } + list_of_adjacency <- list( + iss = iss, + jss = jss + ) + return(list_of_adjacency) +} + +#' Compute Parent Adjacency Matrix with Multiple Approaches +#' @inheritParams ped2com +#' @inherit ped2com details +#' @param nr the number of rows in the pedigree dataset +#' @param lastComputed the last computed index +#' @param parList a list of parent-child relationships +#' @param lens a vector of the lengths of the parent-child relationships +#' @param checkpoint_files a list of checkpoint files + +compute_parent_adjacency <- function(ped, component, + adjacency_method = "direct", + saveable, resume, + save_path, verbose = FALSE, + lastComputed = 0, nr, checkpoint_files, update_rate, + parList, lens, save_rate_parlist, adjBeta_method = NULL, + ...) { + if (adjacency_method == "loop") { + if (lastComputed < nr) { # Original version + list_of_adjacency <- .adjLoop( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + } else if (adjacency_method == "indexed") { # Garrison version + if (lastComputed < nr) { + list_of_adjacency <- .adjIndexed( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + } else if (adjacency_method == "direct") { # Hunter version + if (lastComputed < nr) { + list_of_adjacency <- .adjDirect( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + } else if (adjacency_method == "beta") { + list_of_adjacency <- .adjBeta( + ped = ped, + adjBeta_method = adjBeta_method, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } else { + stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") + } + if (saveable) { + saveRDS(parList, file = checkpoint_files$parList) + saveRDS(lens, file = checkpoint_files$lens) + if (verbose) { + cat("Final checkpoint saved for adjacency matrix.\n") + } + } + return(list_of_adjacency) +} + + +#' Determine isChild Status, isChild is the 'S' matrix from RAM +#' @param isChild_method method to determine isChild status +#' @param ped pedigree data frame +#' @return isChild 'S' matrix +#' + +isChild <- function(isChild_method, ped) { + if (isChild_method == "partialparent") { + isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { + .5 + .25 * sum(is.na(x)) # 2 parents -> .5, 1 parent -> .75, 0 parents -> 1 + }) + } else { + isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { + 2^(-!all(is.na(x))) + }) + } +} + + +.adjBeta <- function(ped, component, + adjBeta_method = 5, + parList = NULL, + lastComputed = 0, + nr = NULL, + lens = NULL, + saveable = FALSE, + resume = FALSE, + save_path = NULL, + verbose = FALSE, + save_rate_parlist = NULL, + update_rate = NULL, + checkpoint_files = NULL, + ...) { # 1) Pairwise compare mother IDs + if (adjBeta_method == 1) { + # gets slow when data are bigger. much slower than indexed + momMatch <- outer(ped$momID, ped$momID, FUN = "==") + momMatch[is.na(momMatch)] <- FALSE + + # 2) Pairwise compare father IDs + dadMatch <- outer(ped$dadID, ped$dadID, FUN = "==") + dadMatch[is.na(dadMatch)] <- FALSE + + # 3) Sibling adjacency if both mom & dad match + adj <- momMatch & dadMatch + + # 4) Extract indices where adj[i,j] is TRUE + w <- which(adj, arr.ind = TRUE) + # iss <- w[, 1] + # jss <- w[, 2] + # + list_of_adjacency <- list( + iss = w[, 1], + jss = w[, 2] + ) + } else if (adjBeta_method == 2) { + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single string label for each known (momID, dadID) pair + pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + # This is "creating a new ID" for each unique parent pair + pairCode <- match(pairLabel, unique(pairLabel)) + + # childVec are the row indices in 'ped' that have known parents + childVec <- which(mask) # length(childVec) = sum(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency i->j + iss_list <- list() + jss_list <- list() + counter <- 1 + + for (g in groupList) { + if (length(g) > 1) { + combos <- expand.grid(g, g, KEEP.OUT.ATTRS = FALSE) + combos <- combos[combos[, 1] != combos[, 2], , drop = FALSE] + iss_list[[counter]] <- combos[, 1] + jss_list[[counter]] <- combos[, 2] + counter <- counter + 1 + } + } + # iss <- unlist(iss_list, use.names = FALSE) + # jss <- unlist(jss_list, use.names = FALSE) + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + } else if (adjBeta_method == 3) { + nr <- nrow(ped) + # terrible + # Define a scalar-checking function: + f_check <- function(i, j) { + # i, j are each single integers + # Return one boolean: do they share both parents? + !is.na(ped$momID[i]) && !is.na(ped$dadID[i]) && + !is.na(ped$momID[j]) && !is.na(ped$dadID[j]) && + (ped$momID[i] == ped$momID[j]) && + (ped$dadID[i] == ped$dadID[j]) + } + + # Vectorize it so outer() will produce an nr x nr matrix + vf_check <- Vectorize(f_check) + + # Now outer() calls vf_check(...) in a way that yields scalar results + adj <- outer(seq_len(nr), seq_len(nr), FUN = vf_check) + + # Extract which cells of adj are TRUE + w <- which(adj, arr.ind = TRUE) + # iss <- w[, 1] + # jss <- w[, 2] + + list_of_adjacency <- list( + iss = iss <- w[, 1], + jss = jss <- w[, 2] + ) + } else if (adjBeta_method == 4) { + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single string label for each known (momID, dadID) pair + pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + pairCode <- match(pairLabel, unique(pairLabel)) + + # childVec are the row indices in 'ped' that have known parents + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 + + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # Instead of expand.grid, do rep() calls: + + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + } else if (adjBeta_method == 5) { + # 1) Create a logical mask for known parents + mask <- !is.na(ped$momID) & !is.na(ped$dadID) + + # 2) Create a single hash label for each known (momID, dadID) pair + # pairLabel <- paste0(ped$momID[mask], "_", ped$dadID[mask]) + base <- max(ped$ID, na.rm = TRUE) + 1L + pairCode <- ped$momID[mask] + base * ped$dadID[mask] + + # 3) Factor that label => each row with the same (mom,dad) gets the same integer code + childVec <- which(mask) + + # 4) Group children by pairCode, so each group is "all children with that (mom,dad)" + groupList <- split(childVec, pairCode) + + # 5) For each group with >1 children, form pairwise adjacency (i->j) for i != j + iss_list <- vector("list", length(groupList)) + jss_list <- vector("list", length(groupList)) + counter <- 1 + + for (g in groupList) { + k <- length(g) + if (k > 1) { + # We'll form all k^2 combos, then remove the diagonal i=j + # Instead of expand.grid, do rep() calls: + + # v = each child repeated k times + # w = entire group repeated once for each child + v <- rep(g, each = k) # row index + w <- rep(g, times = k) # col index + + keep <- (v != w) # remove diagonal where v == w + iss_list[[counter]] <- v[keep] + jss_list[[counter]] <- w[keep] + counter <- counter + 1 + } + } + + list_of_adjacency <- list( + iss = unlist(iss_list, use.names = FALSE), + jss = unlist(jss_list, use.names = FALSE) + ) + } else { + list_of_adjacency <- .adjIndexed( + ped = ped, component = component, + saveable = saveable, resume = resume, + save_path = save_path, verbose = verbose, + lastComputed = lastComputed, nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, parList = parList, + lens = lens, save_rate_parlist = save_rate_parlist + ) + } + return(list_of_adjacency) +} From 75b9b4e2b3f09cd75d55b0cd6175e602d736715a Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 21 Apr 2025 16:45:08 -0400 Subject: [PATCH 09/11] renamed --- .gitignore | 1 + NAMESPACE | 1 + NEWS.md | 1 + R/buildComponent.R | 2 +- R/constructAdjacency.R | 255 ++++---- ...adjacency.Rd => computeParentAdjacency.Rd} | 10 +- man/dot-computeTranspose.Rd | 2 +- man/isChild.Rd | 2 +- man/ped2add.Rd | 2 +- man/ped2ce.Rd | 2 +- man/ped2cn.Rd | 2 +- man/ped2com.Rd | 2 +- man/ped2fam.Rd | 2 +- man/ped2graph.Rd | 2 +- man/ped2maternal.Rd | 2 +- man/ped2mit.Rd | 2 +- man/ped2paternal.Rd | 2 +- vignettes/modelingvariancecomponents.html | 554 ++++++++++++++++++ 18 files changed, 707 insertions(+), 139 deletions(-) rename man/{compute_parent_adjacency.Rd => computeParentAdjacency.Rd} (90%) create mode 100644 vignettes/modelingvariancecomponents.html diff --git a/.gitignore b/.gitignore index 5f04620b..22835dfa 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ dataRelatedPairs_new2.csv paper/paper.html tests/testthat/Rplots.pdf vignettes/articles/paper.html +.vscode/settings.json diff --git a/NAMESPACE b/NAMESPACE index b5cff75a..879d7895 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(checkPedigreeNetwork) export(checkSex) export(com2links) export(comp2vech) +export(computeParentAdjacency) export(createGenDataFrame) export(dropLink) export(evenInsert) diff --git a/NEWS.md b/NEWS.md index c0daa9a2..7693f0eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ * list SimPed and related_coef as aliases for functions * harmonizing function names like calcFamilySize from famSizeCal * implemented adjBeta function to evaluation alternative build method +* reorganize file names to be more consistent # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/R/buildComponent.R b/R/buildComponent.R index dda09f1f..a920207f 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -148,7 +148,7 @@ ped2com <- function(ped, component, iss <- readRDS(checkpoint_files$iss) list_of_adjacencies <- list(iss = iss, jss = jss) } else { - list_of_adjacencies <- compute_parent_adjacency( + list_of_adjacencies <- computeParentAdjacency( ped = ped, save_rate_parlist = save_rate_parlist, checkpoint_files = checkpoint_files, diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R index cc3a4e7f..d0dcdd71 100644 --- a/R/constructAdjacency.R +++ b/R/constructAdjacency.R @@ -204,128 +204,6 @@ return(list_of_adjacency) } -#' Compute Parent Adjacency Matrix with Multiple Approaches -#' @inheritParams ped2com -#' @inherit ped2com details -#' @param nr the number of rows in the pedigree dataset -#' @param lastComputed the last computed index -#' @param parList a list of parent-child relationships -#' @param lens a vector of the lengths of the parent-child relationships -#' @param checkpoint_files a list of checkpoint files - -compute_parent_adjacency <- function(ped, component, - adjacency_method = "direct", - saveable, resume, - save_path, verbose = FALSE, - lastComputed = 0, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, adjBeta_method = NULL, - ...) { - if (adjacency_method == "loop") { - if (lastComputed < nr) { # Original version - list_of_adjacency <- .adjLoop( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "indexed") { # Garrison version - if (lastComputed < nr) { - list_of_adjacency <- .adjIndexed( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "direct") { # Hunter version - if (lastComputed < nr) { - list_of_adjacency <- .adjDirect( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "beta") { - list_of_adjacency <- .adjBeta( - ped = ped, - adjBeta_method = adjBeta_method, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } else { - stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") - } - if (saveable) { - saveRDS(parList, file = checkpoint_files$parList) - saveRDS(lens, file = checkpoint_files$lens) - if (verbose) { - cat("Final checkpoint saved for adjacency matrix.\n") - } - } - return(list_of_adjacency) -} - - -#' Determine isChild Status, isChild is the 'S' matrix from RAM -#' @param isChild_method method to determine isChild status -#' @param ped pedigree data frame -#' @return isChild 'S' matrix -#' - -isChild <- function(isChild_method, ped) { - if (isChild_method == "partialparent") { - isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { - .5 + .25 * sum(is.na(x)) # 2 parents -> .5, 1 parent -> .75, 0 parents -> 1 - }) - } else { - isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { - 2^(-!all(is.na(x))) - }) - } -} .adjBeta <- function(ped, component, @@ -527,3 +405,136 @@ isChild <- function(isChild_method, ped) { } return(list_of_adjacency) } + + + + + +#' Compute Parent Adjacency Matrix with Multiple Approaches +#' @inheritParams ped2com +#' @inherit ped2com details +#' @param nr the number of rows in the pedigree dataset +#' @param lastComputed the last computed index +#' @param parList a list of parent-child relationships +#' @param lens a vector of the lengths of the parent-child relationships +#' @param checkpoint_files a list of checkpoint files +#' @param update_rate the rate at which to update the progress +#' +#' @export +computeParentAdjacency <- function(ped, component, + adjacency_method = "direct", + saveable, resume, + save_path, + verbose = FALSE, + lastComputed = 0, nr, + checkpoint_files, + update_rate, + parList, lens, save_rate_parlist, + adjBeta_method = NULL, + ...) { + if (adjacency_method == "loop") { + if (lastComputed < nr) { # Original version + list_of_adjacency <- .adjLoop( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + } else if (adjacency_method == "indexed") { # Garrison version + if (lastComputed < nr) { + list_of_adjacency <- .adjIndexed( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + } else if (adjacency_method == "direct") { # Hunter version + if (lastComputed < nr) { + list_of_adjacency <- .adjDirect( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + } else if (adjacency_method == "beta") { + list_of_adjacency <- .adjBeta( + ped = ped, + adjBeta_method = adjBeta_method, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } else { + stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") + } + if (saveable) { + saveRDS(parList, file = checkpoint_files$parList) + saveRDS(lens, file = checkpoint_files$lens) + if (verbose) { + cat("Final checkpoint saved for adjacency matrix.\n") + } + } + return(list_of_adjacency) +} + + +#' Determine isChild Status, isChild is the 'S' matrix from RAM +#' @param isChild_method method to determine isChild status +#' @param ped pedigree data frame +#' @return isChild 'S' matrix +#' + +isChild <- function(isChild_method, ped) { + if (isChild_method == "partialparent") { + isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { + .5 + .25 * sum(is.na(x)) # 2 parents -> .5, 1 parent -> .75, 0 parents -> 1 + }) + } else { + isChild <- apply(ped[, c("momID", "dadID")], 1, function(x) { + 2^(-!all(is.na(x))) + }) + } +} diff --git a/man/compute_parent_adjacency.Rd b/man/computeParentAdjacency.Rd similarity index 90% rename from man/compute_parent_adjacency.Rd rename to man/computeParentAdjacency.Rd index f6364808..353a655a 100644 --- a/man/compute_parent_adjacency.Rd +++ b/man/computeParentAdjacency.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R -\name{compute_parent_adjacency} -\alias{compute_parent_adjacency} +% Please edit documentation in R/constructAdjacency.R +\name{computeParentAdjacency} +\alias{computeParentAdjacency} \title{Compute Parent Adjacency Matrix with Multiple Approaches} \usage{ -compute_parent_adjacency( +computeParentAdjacency( ped, component, adjacency_method = "direct", @@ -44,7 +44,7 @@ compute_parent_adjacency( \item{checkpoint_files}{a list of checkpoint files} -\item{update_rate}{numeric. The rate at which to print progress} +\item{update_rate}{the rate at which to update the progress} \item{parList}{a list of parent-child relationships} diff --git a/man/dot-computeTranspose.Rd b/man/dot-computeTranspose.Rd index 38c8fa82..4d90dcbc 100644 --- a/man/dot-computeTranspose.Rd +++ b/man/dot-computeTranspose.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/buildComponent.R \name{.computeTranspose} \alias{.computeTranspose} \title{Compute the transpose multiplication for the relatedness matrix} diff --git a/man/isChild.Rd b/man/isChild.Rd index 1bd6f1e9..be64125d 100644 --- a/man/isChild.Rd +++ b/man/isChild.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/constructAdjacency.R \name{isChild} \alias{isChild} \title{Determine isChild Status, isChild is the 'S' matrix from RAM} diff --git a/man/ped2add.Rd b/man/ped2add.Rd index c2179e99..c3949c68 100644 --- a/man/ped2add.Rd +++ b/man/ped2add.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/buildComponent.R \name{ped2add} \alias{ped2add} \title{Take a pedigree and turn it into an additive genetics relatedness matrix} diff --git a/man/ped2ce.Rd b/man/ped2ce.Rd index b4e3db1d..ed22966a 100644 --- a/man/ped2ce.Rd +++ b/man/ped2ce.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/buildComponent.R \name{ped2ce} \alias{ped2ce} \title{Take a pedigree and turn it into an extended environmental relatedness matrix} diff --git a/man/ped2cn.Rd b/man/ped2cn.Rd index c738d13b..3178cab6 100644 --- a/man/ped2cn.Rd +++ b/man/ped2cn.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/buildComponent.R \name{ped2cn} \alias{ped2cn} \title{Take a pedigree and turn it into a common nuclear environmental relatedness matrix} diff --git a/man/ped2com.Rd b/man/ped2com.Rd index 58c0fc47..b49a6b03 100644 --- a/man/ped2com.Rd +++ b/man/ped2com.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/buildComponent.R \name{ped2com} \alias{ped2com} \title{Take a pedigree and turn it into a relatedness matrix} diff --git a/man/ped2fam.Rd b/man/ped2fam.Rd index 2ff7eb0d..3052e568 100644 --- a/man/ped2fam.Rd +++ b/man/ped2fam.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buildPedigree.R +% Please edit documentation in R/segmentPedigree.R \name{ped2fam} \alias{ped2fam} \title{Segment Pedigree into Extended Families} diff --git a/man/ped2graph.Rd b/man/ped2graph.Rd index 13c191d7..5e7ac7b1 100755 --- a/man/ped2graph.Rd +++ b/man/ped2graph.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buildPedigree.R +% Please edit documentation in R/segmentPedigree.R \name{ped2graph} \alias{ped2graph} \title{Turn a pedigree into a graph} diff --git a/man/ped2maternal.Rd b/man/ped2maternal.Rd index e54c3e76..03e02311 100755 --- a/man/ped2maternal.Rd +++ b/man/ped2maternal.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buildPedigree.R +% Please edit documentation in R/segmentPedigree.R \name{ped2maternal} \alias{ped2maternal} \title{Add a maternal line ID variable to a pedigree} diff --git a/man/ped2mit.Rd b/man/ped2mit.Rd index c19e9ba7..2c7dbcb1 100644 --- a/man/ped2mit.Rd +++ b/man/ped2mit.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/convertPedigree.R +% Please edit documentation in R/buildComponent.R \name{ped2mit} \alias{ped2mit} \alias{ped2mt} diff --git a/man/ped2paternal.Rd b/man/ped2paternal.Rd index 16a9e35a..e893ec03 100755 --- a/man/ped2paternal.Rd +++ b/man/ped2paternal.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/buildPedigree.R +% Please edit documentation in R/segmentPedigree.R \name{ped2paternal} \alias{ped2paternal} \title{Add a paternal line ID variable to a pedigree} diff --git a/vignettes/modelingvariancecomponents.html b/vignettes/modelingvariancecomponents.html new file mode 100644 index 00000000..7b5df889 --- /dev/null +++ b/vignettes/modelingvariancecomponents.html @@ -0,0 +1,554 @@ + + + + + + + + + + + + + + +Modeling variance components + + + + + + + + + + + + + + + + + + + + + + + + + + +

Modeling variance components

+ + + +
+

Introduction

+

This vignette provides a detailed guide to specific functions within +the BGmisc package that aid in the identification and +fitting of variance component models common in behavior genetics. We +will explore key functions such as identifyComponentModel, +providing practical examples and theoretical background. Identification +ensures a unique set of parameters that define the model-implied +covariance matrix, preventing free parameters from trading off one +another.

+
+

Loading Required Libraries

+

Ensure that the BGmisc package is installed and +loaded.

+

Ensure that the following dependencies are installed before +proceeding as they provide us with behavior genetic data and models:

+
    +
  • EasyMx

  • +
  • OpenMx

  • +
+
library(BGmisc)
+library(EasyMx)
+library(OpenMx)
+

Note: If any of the libraries are not installed, you can install them +using install.packages(“package_name”).

+
+
+
+

Working with Variance Component Models

+

In this section, we will demonstrate core functions related to the +identification and fitting of variance component models.

+
+

Using comp2vech Function

+

The comp2vech function is used to vectorize a components +model. The function is often used in conjunction with the identification +process. In this example, we apply it to a list of matrices:

+
comp2vech(list(
+  matrix(c(1, .5, .5, 1), 2, 2),
+  matrix(1, 2, 2)
+))
+#> [1] 1.0 0.5 1.0 1.0 1.0 1.0
+

The result showcases how the matrices have been transformed, +reflecting their role in subsequent variance component analysis.

+
+
+

Using identifyComponentModel Function

+

The identifyComponentModel function helps determine if a +variance components model is identified. It accepts relatedness +component matrices and returns information about identified and +non-identified parameters.

+

Here’s an example using the classical twin model with only MZ +twins:

+
identifyComponentModel(
+  A = list(matrix(1, 2, 2)),
+  C = list(matrix(1, 2, 2)),
+  E = diag(1, 2)
+)
+#> Component model is not identified.
+#> Non-identified parameters are  A, C
+#> $identified
+#> [1] FALSE
+#> 
+#> $nidp
+#> [1] "A" "C"
+

As you can see, the model is not identified. We need to add an +additional group so that we have sufficient information. Let us add the +rest of the classical twin model, in this case DZ twins.

+
identifyComponentModel(
+  A = list(matrix(c(1, .5, .5, 1), 2, 2), matrix(1, 2, 2)),
+  C = list(matrix(1, 2, 2), matrix(1, 2, 2)),
+  E = diag(1, 4)
+)
+#> Component model is identified.
+#> $identified
+#> [1] TRUE
+#> 
+#> $nidp
+#> character(0)
+

As you can see the model is identified, now that we’ve added another +group. Let us confirm by fitting a model. First we prepare the data.

+
require(dplyr)
+#> Loading required package: dplyr
+#> 
+#> Attaching package: 'dplyr'
+#> The following objects are masked from 'package:BGmisc':
+#> 
+#>     between, first, last
+#> The following objects are masked from 'package:stats':
+#> 
+#>     filter, lag
+#> The following objects are masked from 'package:base':
+#> 
+#>     intersect, setdiff, setequal, union
+# require(purrr)
+
+data(twinData, package = "OpenMx")
+selVars <- c("ht1", "ht2")
+
+mzdzData <- subset(
+  twinData, zyg %in% c(1, 3),
+  c(selVars, "zyg")
+)
+
+mzdzData$RCoef <- c(1, NA, .5)[mzdzData$zyg]
+
+
+mzData <- mzdzData %>% filter(zyg == 1)
+

Let us fit the data with MZ twins by themselves.

+
run1 <- emxTwinModel(
+  model = "Cholesky",
+  relatedness = "RCoef",
+  data = mzData,
+  use = selVars,
+  run = TRUE, name = "TwCh"
+)
+#> Running TwCh with 4 parameters
+#> Warning: In model 'TwCh' Optimizer returned a non-zero status code 5. The
+#> Hessian at the solution does not appear to be convex. See
+#> ?mxCheckIdentification for possible diagnosis (Mx status RED).
+
+summary(run1)
+#> Summary of TwCh 
+#>  
+#> The Hessian at the solution does not appear to be convex. See ?mxCheckIdentification for possible diagnosis (Mx status RED). 
+#>  
+#> free parameters:
+#>      name matrix row col   Estimate    Std.Error A lbound ubound
+#> 1 sqrtA11  sqrtA   1   1 0.05122646           NA    1e-06       
+#> 2 sqrtC11  sqrtC   1   1 0.03518629           NA       0!       
+#> 3 sqrtE11  sqrtE   1   1 0.02325722 0.0007017955 !     0!       
+#> 4    Mht1  Means ht1   1 1.62974908 0.0027023907                
+#> 
+#> Model Statistics: 
+#>                |  Parameters  |  Degrees of Freedom  |  Fit (-2lnL units)
+#>        Model:              4                   1112             -3693.148
+#>    Saturated:              5                   1111                    NA
+#> Independence:              4                   1112                    NA
+#> Number of observations/statistics: 569/1116
+#> 
+#> 
+#> ** Information matrix is not positive definite (not at a candidate optimum).
+#>   Be suspicious of these results. At minimum, do not trust the standard errors.
+#> 
+#> Information Criteria: 
+#>       |  df Penalty  |  Parameters Penalty  |  Sample-Size Adjusted
+#> AIC:      -5917.148              -3685.148                -3685.078
+#> BIC:     -10747.543              -3667.773                -3680.471
+#> To get additional fit indices, see help(mxRefModels)
+#> timestamp: 2025-04-21 16:43:57 
+#> Wall clock time: 0.04138112 secs 
+#> optimizer:  SLSQP 
+#> OpenMx version number: 2.21.13 
+#> Need help?  See help(mxSummary)
+

As you can see the model was unsuccessful because it was not +identified. But when we add another group, so that the model is +identified, the model now fits.

+
run2 <- emxTwinModel(
+  model = "Cholesky",
+  relatedness = "RCoef",
+  data = mzdzData,
+  use = selVars,
+  run = TRUE, name = "TwCh"
+)
+#> Running TwCh with 4 parameters
+
+summary(run2)
+#> Summary of TwCh 
+#>  
+#> free parameters:
+#>      name matrix row col   Estimate    Std.Error A lbound ubound
+#> 1 sqrtA11  sqrtA   1   1 0.06339271 0.0014377690    1e-06       
+#> 2 sqrtC11  sqrtC   1   1 0.00000100 0.0250260004 !     0!       
+#> 3 sqrtE11  sqrtE   1   1 0.02330040 0.0007015267       0!       
+#> 4    Mht1  Means ht1   1 1.63295540 0.0020511844                
+#> 
+#> Model Statistics: 
+#>                |  Parameters  |  Degrees of Freedom  |  Fit (-2lnL units)
+#>        Model:              4                   1803             -5507.092
+#>    Saturated:              5                   1802                    NA
+#> Independence:              4                   1803                    NA
+#> Number of observations/statistics: 920/1807
+#> 
+#> Information Criteria: 
+#>       |  df Penalty  |  Parameters Penalty  |  Sample-Size Adjusted
+#> AIC:      -9113.092              -5499.092                -5499.048
+#> BIC:     -17811.437              -5479.794                -5492.498
+#> To get additional fit indices, see help(mxRefModels)
+#> timestamp: 2025-04-21 16:43:57 
+#> Wall clock time: 0.03366184 secs 
+#> optimizer:  SLSQP 
+#> OpenMx version number: 2.21.13 
+#> Need help?  See help(mxSummary)
+
+
+ + + + + + + + + + + From fd71877e195ab1d3b495421e51f20a454301d8c7 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 21 Apr 2025 17:06:30 -0400 Subject: [PATCH 10/11] switch --- R/buildComponent.R | 42 +++++-- R/constructAdjacency.R | 180 +++++++++++++++-------------- tests/testthat/test-readWikiTree.R | 12 +- 3 files changed, 131 insertions(+), 103 deletions(-) diff --git a/R/buildComponent.R b/R/buildComponent.R index a920207f..b57e90dc 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -467,18 +467,38 @@ ped2ce <- function(ped, #' @param r2 a relatedness matrix #' .computeTranspose <- function(r2, transpose_method = "tcrossprod", verbose = FALSE) { - if (!transpose_method %in% c("tcrossprod", "crossprod", "star", "tcross.alt.crossprod", "tcross.alt.star")) { - stop("Invalid method specified. Choose from 'tcrossprod', 'crossprod', or 'star'.") + valid_methods <- c("tcrossprod", "crossprod", "star", + "tcross.alt.crossprod", "tcross.alt.star") + if (!transpose_method %in% valid_methods) { + stop("Invalid method specified. Choose from 'tcrossprod', 'crossprod', 'star', 'tcross.alt.crossprod', or 'tcross.alt.star'.") } - if (transpose_method %in% c("crossprod", "tcross.alt.crossprod")) { - if (verbose) cat("Doing alt tcrossprod crossprod t \n") - return(crossprod(t(as.matrix(r2)))) - } else if (transpose_method %in% c("star", "tcross.alt.star")) { - if (verbose) cat("Doing alt tcrossprod %*% t \n") - return(r2 %*% t(as.matrix(r2))) + + # Map aliases to core methods + alias_map <- c( + "tcross.alt.crossprod" = "crossprod", + "tcross.alt.star" = "star" + ) + + if (transpose_method %in% names(alias_map)) { + method_normalized <- alias_map[[transpose_method]] } else { - if (verbose) cat("Doing tcrossprod\n") - return(Matrix::tcrossprod(r2)) + method_normalized <- transpose_method } -} + result <- switch(method_normalized, + "tcrossprod" = { + if (verbose) cat("Doing tcrossprod\n") + Matrix::tcrossprod(r2) + }, + "crossprod" = { + if (verbose) cat("Doing tcrossprod using crossprod(t(.))\n") + crossprod(t(as.matrix(r2))) + }, + "star" = { + if (verbose) cat("Doing tcrossprod using %*% t(.)\n") + r2 %*% t(as.matrix(r2)) + } + ) + + return(result) +} diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R index d0dcdd71..6a2c337b 100644 --- a/R/constructAdjacency.R +++ b/R/constructAdjacency.R @@ -1,4 +1,3 @@ - .adjLoop <- function(ped, component, saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, @@ -407,9 +406,6 @@ } - - - #' Compute Parent Adjacency Matrix with Multiple Approaches #' @inheritParams ped2com #' @inherit ped2com details @@ -422,93 +418,105 @@ #' #' @export computeParentAdjacency <- function(ped, component, - adjacency_method = "direct", - saveable, resume, - save_path, + adjacency_method = "direct", + saveable, resume, + save_path, verbose = FALSE, - lastComputed = 0, nr, + lastComputed = 0, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, + parList, lens, save_rate_parlist, adjBeta_method = NULL, - ...) { - if (adjacency_method == "loop") { - if (lastComputed < nr) { # Original version - list_of_adjacency <- .adjLoop( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "indexed") { # Garrison version - if (lastComputed < nr) { - list_of_adjacency <- .adjIndexed( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "direct") { # Hunter version - if (lastComputed < nr) { - list_of_adjacency <- .adjDirect( - ped = ped, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) - } - } else if (adjacency_method == "beta") { - list_of_adjacency <- .adjBeta( - ped = ped, - adjBeta_method = adjBeta_method, - component = component, - saveable = saveable, - resume = resume, - save_path = save_path, - verbose = verbose, - lastComputed = lastComputed, - nr = nr, - checkpoint_files = checkpoint_files, - update_rate = update_rate, - parList = parList, - lens = lens, - save_rate_parlist = save_rate_parlist, - ... - ) + ...) { + if (!adjacency_method %in% c("loop", "indexed", "direct", "beta")) { + stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or 'beta'.") + } + # For loop/indexed/direct: skip if already complete + if (adjacency_method != "beta" && lastComputed >= nr) { + list_of_adjacency <- NULL } else { - stop("Invalid method specified. Choose from 'loop', 'direct', 'indexed', or beta") + list_of_adjacency <- switch(adjacency_method, + + "loop" = { + # Original version + .adjLoop( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + }, + + "indexed" = { + # Garrison version + .adjIndexed( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + }, + + "direct" = { + # Hunter version + .adjDirect( + ped = ped, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + }, + + "beta" = { + .adjBeta( + ped = ped, + adjBeta_method = adjBeta_method, + component = component, + saveable = saveable, + resume = resume, + save_path = save_path, + verbose = verbose, + lastComputed = lastComputed, + nr = nr, + checkpoint_files = checkpoint_files, + update_rate = update_rate, + parList = parList, + lens = lens, + save_rate_parlist = save_rate_parlist, + ... + ) + } + ) } if (saveable) { saveRDS(parList, file = checkpoint_files$parList) diff --git a/tests/testthat/test-readWikiTree.R b/tests/testthat/test-readWikiTree.R index df894c08..e7602e03 100644 --- a/tests/testthat/test-readWikiTree.R +++ b/tests/testthat/test-readWikiTree.R @@ -19,8 +19,8 @@ test_that("traceTreePaths works correctly for horizontal tree", { result <- traceTreePaths(tree_horizontal) # Check the result # Check the result - expect_equal(names(result), c("from_id", "to_id", "path_length", "intermediates","intermediate_values")) - expect_equal(c("A", "B") %in% c(result$from_id, result$to_id), rep(TRUE,2)) + expect_equal(names(result), c("from_id", "to_id", "path_length", "intermediates", "intermediate_values")) + expect_equal(c("A", "B") %in% c(result$from_id, result$to_id), rep(TRUE, 2)) expect_equal(result$path_length[result$from_id == "A" & result$to_id == "B"], 2) expect_equal(result$intermediate_values[result$from_id == "A" & result$to_id == "B"], "+") }) @@ -42,13 +42,12 @@ test_that("traceTreePaths works correctly for vertical tree", { result <- traceTreePaths(tree_spouse_child) # Check the result - expect_equal(names(result), c("from_id", "to_id", "path_length", "intermediates","intermediate_values")) - expect_equal(c("A", "B", "C") %in% c(result$from_id, result$to_id), rep(TRUE,3)) + expect_equal(names(result), c("from_id", "to_id", "path_length", "intermediates", "intermediate_values")) + expect_equal(c("A", "B", "C") %in% c(result$from_id, result$to_id), rep(TRUE, 3)) expect_equal(result$path_length[result$from_id == "A" & result$to_id == "B"], 2) expect_equal(result$path_length[result$from_id == "A" & result$to_id == "C"], 5) expect_equal(result$path_length[result$from_id == "B" & result$to_id == "C"], 5) expect_equal(result$intermediate_values[result$from_id == "A" & result$to_id == "B"], "+") - }) @@ -83,7 +82,8 @@ test_that("readWikifamilytree reads a string correctly", { expect_equal( result2$summary, - result$summary) + result$summary + ) }) From 05676d15e090d584ed6fa05e8e5b0ab6929620bf Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 21 Apr 2025 17:51:44 -0400 Subject: [PATCH 11/11] rename --- R/{computeRelatedness.R => calculateRelatedness.R} | 1 - 1 file changed, 1 deletion(-) rename R/{computeRelatedness.R => calculateRelatedness.R} (99%) diff --git a/R/computeRelatedness.R b/R/calculateRelatedness.R similarity index 99% rename from R/computeRelatedness.R rename to R/calculateRelatedness.R index 3fa91ffd..d1da241d 100644 --- a/R/computeRelatedness.R +++ b/R/calculateRelatedness.R @@ -107,7 +107,6 @@ inferRelatedness <- function(obsR, aceA = .9, aceC = 0, sharedC = 0) { } #' @rdname inferRelatedness -#' @export relatedness <- function(...) { warning("The 'relatedness' function is deprecated. Please use 'inferRelatedness' instead.") inferRelatedness(...)