From 703786e26d788865270b23336a564e321eade000 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Tue, 22 Apr 2025 10:16:55 -0400 Subject: [PATCH] renamed 1.3.6 to 1.4.0 --- DESCRIPTION | 2 +- NEWS.md | 3 +- R/buildComponent.R | 5 +- R/checkParents.R | 78 +++------------ R/checkSex.R | 137 ++++++++++++++++++++------ R/cleanPedigree.R | 2 +- R/constructAdjacency.R | 17 +--- R/tweakPedigree.R | 4 +- tests/testthat/test-convertPedigree.R | 2 +- tests/testthat/test-tweakPedigree.R | 14 +-- 10 files changed, 137 insertions(+), 127 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bcdab09f..a81d4686 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: BGmisc Title: An R Package for Extended Behavior Genetics Analysis -Version: 1.3.6 +Version: 1.4.0 Authors@R: c( person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-4804-6003")), diff --git a/NEWS.md b/NEWS.md index 7693f0eb..7d508905 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# BGmisc 1.3.6 +# BGmisc 1.4.0 * revived checkParents function to check for handling phantom parents and missing parents * added tests for checkParents function * added GoT analysis @@ -8,6 +8,7 @@ * harmonizing function names like calcFamilySize from famSizeCal * implemented adjBeta function to evaluation alternative build method * reorganize file names to be more consistent +* harmonized famID # BGmisc 1.3.5.1 * Setting the default for the `sparse` argument in `ped2com()` to TRUE diff --git a/R/buildComponent.R b/R/buildComponent.R index b57e90dc..8c1da06e 100644 --- a/R/buildComponent.R +++ b/R/buildComponent.R @@ -120,6 +120,7 @@ ped2com <- function(ped, component, } # Step 1: Construct parent-child adjacency matrix + ## A. Resume from Checkpoint if Needed if (resume && file.exists(checkpoint_files$parList) && file.exists(checkpoint_files$lens)) { if (verbose) cat("Resuming: Loading parent-child adjacency data...\n") @@ -139,7 +140,6 @@ ped2com <- function(ped, component, ## B. Resume loop from the next uncomputed index - if (verbose) cat("Computing parent-child adjacency matrix...\n") # Construct sparse matrix if (resume && file.exists(checkpoint_files$iss) && file.exists(checkpoint_files$jss)) { # fix to check actual @@ -234,6 +234,8 @@ ped2com <- function(ped, component, } } # --- Step 2: Compute Relatedness Matrix --- + + if (resume && file.exists(checkpoint_files$r_checkpoint) && file.exists(checkpoint_files$gen_checkpoint) && file.exists(checkpoint_files$mtSum_checkpoint) && file.exists(checkpoint_files$newIsPar_checkpoint) && file.exists(checkpoint_files$count_checkpoint) ) { @@ -320,7 +322,6 @@ ped2com <- function(ped, component, } } - if (component == "mitochondrial") { r@x <- rep(1, length(r@x)) # Assign 1 to all nonzero elements for mitochondrial component diff --git a/R/checkParents.R b/R/checkParents.R index 43c8a924..18f8a17a 100644 --- a/R/checkParents.R +++ b/R/checkParents.R @@ -13,6 +13,7 @@ #' @param addphantoms A logical flag indicating whether to add phantom parents for missing parent IDs. #' @param parentswithoutrow A logical flag indicating whether to add parents without a row in the pedigree. #' +#' #' @return Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned. #' @examples #' \dontrun{ @@ -25,10 +26,8 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, addphantoms = repair, parentswithoutrow = repair) { # Standardize column names in the input dataframe - ped_og <- ped ped <- standardizeColnames(ped, verbose = verbose) - # Initialize a list to store validation results validation_results <- list() @@ -36,6 +35,7 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, cat("Step 1: Checking for single parents...\n") } + # Identify missing fathers and mothers missing_fathers <- ped$ID[which(is.na(ped$dadID) & !is.na(ped$momID))] missing_mothers <- ped$ID[which(!is.na(ped$dadID) & is.na(ped$momID))] @@ -79,69 +79,17 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE, cat("Step 2: Determining the if moms are the same sex and dads are same sex\n") } # Determine modal sex values for moms and dads - mom_sexes <- ped$sex[ped$ID %in% ped$momID] - dad_sexes <- ped$sex[ped$ID %in% ped$dadID] - - # Determine the most frequent sex for moms and dads - most_frequent_sex_mom <- names(sort(table(mom_sexes), decreasing = TRUE))[1] - most_frequent_sex_dad <- names(sort(table(dad_sexes), decreasing = TRUE))[1] - - # are all moms/dads the same sex? - validation_results$mom_sex <- unique(mom_sexes) - validation_results$dad_sex <- unique(dad_sexes) - - # Store the most frequent sex for moms and dads - if (is.numeric(ped$sex)) { - validation_results$female_var <- as.numeric(most_frequent_sex_mom) - validation_results$male_var <- as.numeric(most_frequent_sex_dad) - } else if (is.character(ped$sex) | is.factor(ped$sex)) { - validation_results$female_var <- most_frequent_sex_mom - validation_results$male_var <- most_frequent_sex_dad - } else { - print("You should never see this. If you do, then you have a problem with the data type of the sex variable") - } - - # verbose - if (length(validation_results$mom_sex) == 1) { - if (verbose) { - cat(paste0( - "All moms are '", - validation_results$female_var, - "'.\n" - )) - } - validation_results$female_moms <- TRUE - } else { - validation_results$female_moms <- FALSE - } - - if (length(validation_results$dad_sex) == 1) { - if (verbose) { - cat(paste0( - "All dads are '", - validation_results$male_var, - "'.\n" - )) - } - validation_results$male_dads <- TRUE - } else { - validation_results$male_dads <- FALSE - } - # Check for inconsistent sex - wrong_sex_moms <- ped$ID[which(ped$sex[ped$ID %in% ped$momID] != validation_results$female_var)] - wrong_sex_dads <- ped$ID[which(ped$sex[ped$ID %in% ped$dadID] != validation_results$male_var)] - - validation_results$wrong_sex_moms <- wrong_sex_moms - validation_results$wrong_sex_dads <- wrong_sex_dads - - if (verbose) { - if (length(wrong_sex_moms) > 0) { - cat("Some individuals listed as moms are not coded as", validation_results$female_var, "\n") - } - if (length(wrong_sex_dads) > 0) { - cat("Some individuals listed as dads are not coded as", validation_results$male_var, "\n") - } - } + mom_results <- checkParentSex(ped, parent_col = "momID", verbose = verbose) + dad_results <- checkParentSex(ped, parent_col = "dadID", verbose = verbose) + + validation_results$mom_sex <- mom_results$unique_sexes + validation_results$dad_sex <- dad_results$unique_sexes + validation_results$female_var <- mom_results$modal_sex + validation_results$male_var <- dad_results$modal_sex + validation_results$wrong_sex_moms <- mom_results$inconsistent_parents + validation_results$wrong_sex_dads <- dad_results$inconsistent_parents + validation_results$female_moms <- mom_results$all_same_sex + validation_results$male_dads <- dad_results$all_same_sex # Are any parents in both momID and dadID? momdad <- intersect(ped$dadID, ped$momID) diff --git a/R/checkSex.R b/R/checkSex.R index 8fbabf6a..2c729760 100644 --- a/R/checkSex.R +++ b/R/checkSex.R @@ -35,7 +35,10 @@ #' } #' @export #' -checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE) { +checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE, + momID = "momID", + dadID = "dadID" + ) { # Standardize column names in the input dataframe ped <- standardizeColnames(ped, verbose = verbose) @@ -53,36 +56,31 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, validation_results$sex_unique <- unique(ped$sex) validation_results$sex_length <- length(unique(ped$sex)) if (verbose) { - cat(paste0(validation_results$sex_length, " unique values found.\n")) - cat(paste0("Unique values: ", paste0(validation_results$sex_unique, collapse = ", "), "\n")) + cat(validation_results$sex_length, " unique sex codes found: ", paste(validation_results$sex_unique, collapse = ", "), "\n") } - # Are there multiple sexes/genders in the list of dads and moms? - table_sex_dad <- sort(table(ped$sex[ped$ID %in% ped$dadID]), decreasing = TRUE) - table_sex_mom <- sort(table(ped$sex[ped$ID %in% ped$momID]), decreasing = TRUE) - validation_results$all_sex_dad <- names(table_sex_dad) - validation_results$all_sex_mom <- names(table_sex_mom) - validation_results$most_frequent_sex_dad <- validation_results$all_sex_dad[1] - validation_results$most_frequent_sex_mom <- validation_results$all_sex_mom[1] + # Are there multiple sexes/genders in the list of dads and moms? - # List ids for dads that are female, moms that are male - if (length(validation_results$all_sex_dad) > 1) { - df_dads <- ped[ped$ID %in% ped$dadID, ] - validation_results$ID_female_dads <- df_dads$ID[df_dads$sex != validation_results$most_frequent_sex_dad] - validation_results$ID_child_female_dads <- ped$ID[ped$dadID %in% validation_results$ID_female_dads] - remove(df_dads) - } - if (length(validation_results$all_sex_mom) > 1) { - df_moms <- ped[ped$ID %in% ped$momID, ] - validation_results$ID_male_moms <- df_moms$ID[df_moms$sex != validation_results$most_frequent_sex_mom] - validation_results$ID_child_male_moms <- ped$ID[ped$momID %in% validation_results$ID_male_moms] - remove(df_moms) - } + dad_results <- checkParentSex(ped, parent_col = dadID, verbose = verbose) + mom_results <- checkParentSex(ped, parent_col = momID, verbose = verbose) - if (repair) { - if (verbose) { + validation_results$all_sex_dad <- dad_results$unique_sexes + validation_results$all_sex_mom <- mom_results$unique_sexes + validation_results$most_frequent_sex_dad <- dad_results$modal_sex + validation_results$most_frequent_sex_mom <- mom_results$modal_sex + validation_results$ID_female_dads <- dad_results$inconsistent_parents + validation_results$ID_child_female_dads <- dad_results$inconsistent_children + validation_results$ID_male_moms <- mom_results$inconsistent_parents + validation_results$ID_child_male_moms <- mom_results$inconsistent_children + + if (repair==FALSE) { + if (verbose) { cat("Checks Made:\n") + print(validation_results) } + return(validation_results) + } else { + if (verbose==TRUE) { cat("Step 2: Attempting to repair sex coding...\n") } # Initialize a list to track changes made during repair @@ -107,13 +105,6 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, print(changes) } return(ped) - } else { - if (verbose) { - cat("Checks Made:\n") - print(validation_results) - } - - return(validation_results) } } @@ -193,3 +184,85 @@ recodeSex <- function( } return(ped) } + + + +#' Check Parental Role Sex Consistency +#' +#' Validates sex coding consistency for a given parental role (momID or dadID). +#' +#' @param ped Pedigree dataframe. +#' @param parent_col The column name for parent IDs ("momID" or "dadID"). +#' @param sex_col The column name for sex coding. Default is "sex". +#' @param verbose Logical, whether to print messages. +#' +#' @return A list containing role, unique sex codes, modal sex, inconsistent parents, and linked children. +checkParentSex <- function(ped, parent_col, sex_col = "sex", verbose = FALSE) { + + + parent_ids <- ped[[parent_col]] + parent_rows <- ped[ped$ID %in% parent_ids, ] + + if (nrow(parent_rows) == 0) { + if (verbose) cat(paste0("No individuals found in role: ", parent_col, "\n")) + return(list( + role = parent_col, + unique_sexes = NULL, + modal_sex = NA, + all_same_sex = NA, + inconsistent_parents = NULL, + inconsistent_children = NULL + )) + } + + + # Are there multiple sexes/genders in the list of dads and moms? + parent_sexes <- parent_rows[[sex_col]] + unique_sexes <- unique(parent_sexes) + + # are all moms/dads the same sex? + all_same_sex <- length(unique_sexes) == 1 + + # Store the most frequent sex for moms and dads + modal_sex <- names(sort(table(parent_sexes), decreasing = TRUE))[1] + + # Type coercion based on ped$sex type + if (is.numeric(ped[[sex_col]])) { + modal_sex <- as.numeric(modal_sex) + } + + # List ids for dads that are female, moms that are male + inconsistent_parents <- parent_rows$ID[parent_rows[[sex_col]] != modal_sex] + + child_col <- parent_col + inconsistent_children <- ped$ID[ped[[child_col]] %in% inconsistent_parents] + + + if (verbose) { + cat(paste0("Role: ", parent_col, "\n")) + cat(length(unique_sexes), " unique sex codes found: ", paste(unique_sexes, collapse = ", "), "\n") + cat("Modal sex code: ", modal_sex, "\n") + + if (all_same_sex) { + cat("All parents consistently coded.\n") + } else cat(length(inconsistent_parents), " parents have inconsistent sex coding.\n") + } + + return(list( + role = parent_col, + unique_sexes = unique_sexes, + modal_sex = modal_sex, + all_same_sex = all_same_sex, + inconsistent_parents = inconsistent_parents, + inconsistent_children = inconsistent_children + )) +} + +#' Get the Modal Value of a Vector + +.getModalValue <- function(x) { + if (length(na.omit(x)) == 0) return(NA) + freq_table <- sort(table(x), decreasing = TRUE) + modal <- names(freq_table)[1] + return(modal) +} diff --git a/R/cleanPedigree.R b/R/cleanPedigree.R index 737b558c..46a3a64a 100644 --- a/R/cleanPedigree.R +++ b/R/cleanPedigree.R @@ -13,7 +13,7 @@ standardizeColnames <- function(df, verbose = FALSE) { # Internal mapping of standardized names to possible variants mapping <- list( - "fam" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", + "famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)", "ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)", "gen" = "^(?:gen(?:s|eration)?)", "dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)", diff --git a/R/constructAdjacency.R b/R/constructAdjacency.R index 6a2c337b..a1d6fd66 100644 --- a/R/constructAdjacency.R +++ b/R/constructAdjacency.R @@ -119,7 +119,7 @@ .adjDirect <- function(ped, component, saveable, resume, save_path, verbose, lastComputed, nr, checkpoint_files, update_rate, - parList, lens, save_rate_parlist, adjBeta_method, + parList, lens, save_rate_parlist, ...) { # Loop through each individual in the pedigree # Build the adjacency matrix for parent-child relationships @@ -177,18 +177,6 @@ iss <- unlist(iss_list, use.names = FALSE) jss <- unlist(jss_list, use.names = FALSE) - # list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method, - # component = component, - # saveable = saveable, resume = resume, - # save_path = save_path, verbose = verbose, - # lastComputed = lastComputed, nr = nr, - # checkpoint_files = checkpoint_files, - # update_rate = update_rate, - # parList = parList, - # lens = lens, save_rate_parlist = save_rate_parlist, - # ...) - - # return(list_of_adjacency) } else if (component %in% c("mitochondrial")) { mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID)) iss <- c(mIDs$rID) @@ -459,8 +447,7 @@ computeParentAdjacency <- function(ped, component, "indexed" = { # Garrison version - .adjIndexed( - ped = ped, + .adjIndexed(ped = ped, component = component, saveable = saveable, resume = resume, diff --git a/R/tweakPedigree.R b/R/tweakPedigree.R index d617851b..52b67a30 100644 --- a/R/tweakPedigree.R +++ b/R/tweakPedigree.R @@ -15,7 +15,7 @@ makeTwins <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2, verbose = FALSE) { # Check if the ped is the same format as the output of simulatePedigree if (paste0(colnames(ped), collapse = "") != paste0(c( - "fam", "ID", "gen", + "famID", "ID", "gen", "dadID", "momID", "spID", "sex" ), collapse = "")) { ped <- standardizeColnames(ped, verbose = verbose) @@ -128,7 +128,7 @@ makeInbreeding <- function(ped, if (paste0(colnames(ped), collapse = "" ) != paste0( - c("fam", "ID", "gen", "dadID", "momID", "spID", "sex"), + c("famID", "ID", "gen", "dadID", "momID", "spID", "sex"), collapse = "" )) { ped <- standardizeColnames(ped, verbose = verbose) diff --git a/tests/testthat/test-convertPedigree.R b/tests/testthat/test-convertPedigree.R index a011c334..c927d2ad 100644 --- a/tests/testthat/test-convertPedigree.R +++ b/tests/testthat/test-convertPedigree.R @@ -37,7 +37,7 @@ test_that("ped2add produces correct matrix dims, values, and dimnames for altern expect_equal(dn[[1]], dn[[2]]) expect_equal(dn[[1]], as.character(hazard$ID)) }) -# to do, combine the sets that are equalivant. shouldn't need to run 1000 expect equals +# to do, combine the sets that are equivalent. shouldn't need to run 1000 expect equals test_that("ped2add produces correct matrix dims, values, and dimnames for inbreeding data", { tolerance <- 1e-10 diff --git a/tests/testthat/test-tweakPedigree.R b/tests/testthat/test-tweakPedigree.R index 0b542e2e..eb00908a 100644 --- a/tests/testthat/test-tweakPedigree.R +++ b/tests/testthat/test-tweakPedigree.R @@ -2,7 +2,7 @@ test_that("makeTwins - Twins specified by IDs", { set.seed(1234) ped <- data.frame( - fam = c(1, 1, 2, 2), + famID = c(1, 1, 2, 2), ID = c(1, 2, 3, 4), gen = c(1, 1, 2, 2), dadID = c(NA, NA, 1, 1), @@ -11,7 +11,7 @@ test_that("makeTwins - Twins specified by IDs", { sex = c("M", "F", "M", "F") ) expected_result <- data.frame( - fam = c(1, 1, 2, 2), + famID = c(1, 1, 2, 2), ID = c(1, 2, 3, 4), gen = c(1, 1, 2, 2), dadID = c(NA, NA, 1, 1), @@ -25,7 +25,7 @@ test_that("makeTwins - Twins specified by IDs", { expect_equal(result, expected_result) # does it handle weird variable names? "fam" = "famID" - names(ped)[1] <- "famID" + names(ped)[1] <- "fam" result <- makeTwins(ped, ID_twin1 = 1, ID_twin2 = 2, verbose = TRUE) expect_equal(result, expected_result) @@ -41,7 +41,7 @@ test_that("makeTwins - Twins specified by generation", { ped <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) # result <- makeTwins(ped, gen_twin = gen_twin) - expect_equal(names(result), c("fam", "ID", "gen", "dadID", "momID", "spID", "sex", "MZtwin")) + expect_equal(names(result), c("famID", "ID", "gen", "dadID", "momID", "spID", "sex", "MZtwin")) # do we have the same people? expect_equal(result$ID, ped$ID) # did it make one pair of twins? @@ -61,7 +61,7 @@ test_that("makeTwins - Twins specified by generation", { # Test for makeInbreeding function test_that("makeInbreeding - Inbred mates specified by IDs", { ped <- data.frame( - fam = c(1, 1, 2, 2), + famID = c(1, 1, 2, 2), ID = c(1, 2, 3, 4), gen = c(1, 1, 2, 2), dadID = c(NA, NA, 1, 1), @@ -70,7 +70,7 @@ test_that("makeInbreeding - Inbred mates specified by IDs", { sex = c("M", "F", "M", "F") ) expected_result <- data.frame( - fam = c(1, 1, 2, 2), + famID = c(1, 1, 2, 2), ID = c(1, 2, 3, 4), gen = c(1, 1, 2, 2), dadID = c(NA, NA, 1, 1), @@ -94,7 +94,7 @@ test_that("makeInbreeding - Inbred mates specified by generation and sibling", { ped <- simulatePedigree(kpc = kpc, Ngen = Ngen, sexR = sexR, marR = marR) # result <- makeInbreeding(ped, gen_inbred = gen_inbred, type_inbred = type_inbred) - expect_equal(names(result), c("fam", "ID", "gen", "dadID", "momID", "spID", "sex")) + expect_equal(names(result), c("famID", "ID", "gen", "dadID", "momID", "spID", "sex")) # do we have the same people? expect_equal(result$ID, ped$ID)