Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BGmisc
Title: An R Package for Extended Behavior Genetics Analysis
Version: 1.3.6
Version: 1.4.0
Authors@R: c(
person("S. Mason", "Garrison", , "garrissm@wfu.edu", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-4804-6003")),
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# BGmisc 1.3.6
# BGmisc 1.4.0
* revived checkParents function to check for handling phantom parents and missing parents
* added tests for checkParents function
* added GoT analysis
Expand All @@ -8,6 +8,7 @@
* harmonizing function names like calcFamilySize from famSizeCal
* implemented adjBeta function to evaluation alternative build method
* reorganize file names to be more consistent
* harmonized famID

# BGmisc 1.3.5.1
* Setting the default for the `sparse` argument in `ped2com()` to TRUE
Expand Down
5 changes: 3 additions & 2 deletions R/buildComponent.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ ped2com <- function(ped, component,
}

# Step 1: Construct parent-child adjacency matrix

## A. Resume from Checkpoint if Needed
if (resume && file.exists(checkpoint_files$parList) && file.exists(checkpoint_files$lens)) {
if (verbose) cat("Resuming: Loading parent-child adjacency data...\n")
Expand All @@ -139,7 +140,6 @@ ped2com <- function(ped, component,


## B. Resume loop from the next uncomputed index

if (verbose) cat("Computing parent-child adjacency matrix...\n")
# Construct sparse matrix
if (resume && file.exists(checkpoint_files$iss) && file.exists(checkpoint_files$jss)) { # fix to check actual
Expand Down Expand Up @@ -234,6 +234,8 @@ ped2com <- function(ped, component,
}
}
# --- Step 2: Compute Relatedness Matrix ---


if (resume && file.exists(checkpoint_files$r_checkpoint) && file.exists(checkpoint_files$gen_checkpoint) && file.exists(checkpoint_files$mtSum_checkpoint) && file.exists(checkpoint_files$newIsPar_checkpoint) &&
file.exists(checkpoint_files$count_checkpoint)
) {
Expand Down Expand Up @@ -320,7 +322,6 @@ ped2com <- function(ped, component,
}
}


if (component == "mitochondrial") {
r@x <- rep(1, length(r@x))
# Assign 1 to all nonzero elements for mitochondrial component
Expand Down
78 changes: 13 additions & 65 deletions R/checkParents.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' @param addphantoms A logical flag indicating whether to add phantom parents for missing parent IDs.
#' @param parentswithoutrow A logical flag indicating whether to add parents without a row in the pedigree.
#'
#'
#' @return Depending on the value of `repair`, either a list containing validation results or a repaired dataframe is returned.
#' @examples
#' \dontrun{
Expand All @@ -25,17 +26,16 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
addphantoms = repair,
parentswithoutrow = repair) {
# Standardize column names in the input dataframe
ped_og <- ped
ped <- standardizeColnames(ped, verbose = verbose)


# Initialize a list to store validation results
validation_results <- list()

if (verbose) {
cat("Step 1: Checking for single parents...\n")
}


# Identify missing fathers and mothers
missing_fathers <- ped$ID[which(is.na(ped$dadID) & !is.na(ped$momID))]
missing_mothers <- ped$ID[which(!is.na(ped$dadID) & is.na(ped$momID))]
Expand Down Expand Up @@ -79,69 +79,17 @@ checkParentIDs <- function(ped, verbose = FALSE, repair = FALSE,
cat("Step 2: Determining the if moms are the same sex and dads are same sex\n")
}
# Determine modal sex values for moms and dads
mom_sexes <- ped$sex[ped$ID %in% ped$momID]
dad_sexes <- ped$sex[ped$ID %in% ped$dadID]

# Determine the most frequent sex for moms and dads
most_frequent_sex_mom <- names(sort(table(mom_sexes), decreasing = TRUE))[1]
most_frequent_sex_dad <- names(sort(table(dad_sexes), decreasing = TRUE))[1]

# are all moms/dads the same sex?
validation_results$mom_sex <- unique(mom_sexes)
validation_results$dad_sex <- unique(dad_sexes)

# Store the most frequent sex for moms and dads
if (is.numeric(ped$sex)) {
validation_results$female_var <- as.numeric(most_frequent_sex_mom)
validation_results$male_var <- as.numeric(most_frequent_sex_dad)
} else if (is.character(ped$sex) | is.factor(ped$sex)) {
validation_results$female_var <- most_frequent_sex_mom
validation_results$male_var <- most_frequent_sex_dad
} else {
print("You should never see this. If you do, then you have a problem with the data type of the sex variable")
}

# verbose
if (length(validation_results$mom_sex) == 1) {
if (verbose) {
cat(paste0(
"All moms are '",
validation_results$female_var,
"'.\n"
))
}
validation_results$female_moms <- TRUE
} else {
validation_results$female_moms <- FALSE
}

if (length(validation_results$dad_sex) == 1) {
if (verbose) {
cat(paste0(
"All dads are '",
validation_results$male_var,
"'.\n"
))
}
validation_results$male_dads <- TRUE
} else {
validation_results$male_dads <- FALSE
}
# Check for inconsistent sex
wrong_sex_moms <- ped$ID[which(ped$sex[ped$ID %in% ped$momID] != validation_results$female_var)]
wrong_sex_dads <- ped$ID[which(ped$sex[ped$ID %in% ped$dadID] != validation_results$male_var)]

validation_results$wrong_sex_moms <- wrong_sex_moms
validation_results$wrong_sex_dads <- wrong_sex_dads

if (verbose) {
if (length(wrong_sex_moms) > 0) {
cat("Some individuals listed as moms are not coded as", validation_results$female_var, "\n")
}
if (length(wrong_sex_dads) > 0) {
cat("Some individuals listed as dads are not coded as", validation_results$male_var, "\n")
}
}
mom_results <- checkParentSex(ped, parent_col = "momID", verbose = verbose)
dad_results <- checkParentSex(ped, parent_col = "dadID", verbose = verbose)

validation_results$mom_sex <- mom_results$unique_sexes
validation_results$dad_sex <- dad_results$unique_sexes
validation_results$female_var <- mom_results$modal_sex
validation_results$male_var <- dad_results$modal_sex
validation_results$wrong_sex_moms <- mom_results$inconsistent_parents
validation_results$wrong_sex_dads <- dad_results$inconsistent_parents
validation_results$female_moms <- mom_results$all_same_sex
validation_results$male_dads <- dad_results$all_same_sex

# Are any parents in both momID and dadID?
momdad <- intersect(ped$dadID, ped$momID)
Expand Down
137 changes: 105 additions & 32 deletions R/checkSex.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
#' }
#' @export
#'
checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE) {
checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE, repair = FALSE,
momID = "momID",
dadID = "dadID"
) {
# Standardize column names in the input dataframe
ped <- standardizeColnames(ped, verbose = verbose)

Expand All @@ -53,36 +56,31 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE,
validation_results$sex_unique <- unique(ped$sex)
validation_results$sex_length <- length(unique(ped$sex))
if (verbose) {
cat(paste0(validation_results$sex_length, " unique values found.\n"))
cat(paste0("Unique values: ", paste0(validation_results$sex_unique, collapse = ", "), "\n"))
cat(validation_results$sex_length, " unique sex codes found: ", paste(validation_results$sex_unique, collapse = ", "), "\n")
}
# Are there multiple sexes/genders in the list of dads and moms?

table_sex_dad <- sort(table(ped$sex[ped$ID %in% ped$dadID]), decreasing = TRUE)
table_sex_mom <- sort(table(ped$sex[ped$ID %in% ped$momID]), decreasing = TRUE)

validation_results$all_sex_dad <- names(table_sex_dad)
validation_results$all_sex_mom <- names(table_sex_mom)

validation_results$most_frequent_sex_dad <- validation_results$all_sex_dad[1]
validation_results$most_frequent_sex_mom <- validation_results$all_sex_mom[1]
# Are there multiple sexes/genders in the list of dads and moms?

# List ids for dads that are female, moms that are male
if (length(validation_results$all_sex_dad) > 1) {
df_dads <- ped[ped$ID %in% ped$dadID, ]
validation_results$ID_female_dads <- df_dads$ID[df_dads$sex != validation_results$most_frequent_sex_dad]
validation_results$ID_child_female_dads <- ped$ID[ped$dadID %in% validation_results$ID_female_dads]
remove(df_dads)
}
if (length(validation_results$all_sex_mom) > 1) {
df_moms <- ped[ped$ID %in% ped$momID, ]
validation_results$ID_male_moms <- df_moms$ID[df_moms$sex != validation_results$most_frequent_sex_mom]
validation_results$ID_child_male_moms <- ped$ID[ped$momID %in% validation_results$ID_male_moms]
remove(df_moms)
}
dad_results <- checkParentSex(ped, parent_col = dadID, verbose = verbose)
mom_results <- checkParentSex(ped, parent_col = momID, verbose = verbose)

if (repair) {
if (verbose) {
validation_results$all_sex_dad <- dad_results$unique_sexes
validation_results$all_sex_mom <- mom_results$unique_sexes
validation_results$most_frequent_sex_dad <- dad_results$modal_sex
validation_results$most_frequent_sex_mom <- mom_results$modal_sex
validation_results$ID_female_dads <- dad_results$inconsistent_parents
validation_results$ID_child_female_dads <- dad_results$inconsistent_children
validation_results$ID_male_moms <- mom_results$inconsistent_parents
validation_results$ID_child_male_moms <- mom_results$inconsistent_children

if (repair==FALSE) {
if (verbose) { cat("Checks Made:\n")
print(validation_results) }
return(validation_results)
} else {
if (verbose==TRUE) {
cat("Step 2: Attempting to repair sex coding...\n")
}
# Initialize a list to track changes made during repair
Expand All @@ -107,13 +105,6 @@ checkSex <- function(ped, code_male = NULL, code_female = NULL, verbose = FALSE,
print(changes)
}
return(ped)
} else {
if (verbose) {
cat("Checks Made:\n")
print(validation_results)
}

return(validation_results)
}
}

Expand Down Expand Up @@ -193,3 +184,85 @@ recodeSex <- function(
}
return(ped)
}



#' Check Parental Role Sex Consistency
#'
#' Validates sex coding consistency for a given parental role (momID or dadID).
#'
#' @param ped Pedigree dataframe.
#' @param parent_col The column name for parent IDs ("momID" or "dadID").
#' @param sex_col The column name for sex coding. Default is "sex".
#' @param verbose Logical, whether to print messages.
#'
#' @return A list containing role, unique sex codes, modal sex, inconsistent parents, and linked children.
checkParentSex <- function(ped, parent_col, sex_col = "sex", verbose = FALSE) {


parent_ids <- ped[[parent_col]]
parent_rows <- ped[ped$ID %in% parent_ids, ]

if (nrow(parent_rows) == 0) {
if (verbose) cat(paste0("No individuals found in role: ", parent_col, "\n"))
return(list(
role = parent_col,
unique_sexes = NULL,
modal_sex = NA,
all_same_sex = NA,
inconsistent_parents = NULL,
inconsistent_children = NULL
))
}


# Are there multiple sexes/genders in the list of dads and moms?
parent_sexes <- parent_rows[[sex_col]]
unique_sexes <- unique(parent_sexes)

# are all moms/dads the same sex?
all_same_sex <- length(unique_sexes) == 1

# Store the most frequent sex for moms and dads
modal_sex <- names(sort(table(parent_sexes), decreasing = TRUE))[1]

# Type coercion based on ped$sex type
if (is.numeric(ped[[sex_col]])) {
modal_sex <- as.numeric(modal_sex)
}

# List ids for dads that are female, moms that are male
inconsistent_parents <- parent_rows$ID[parent_rows[[sex_col]] != modal_sex]

child_col <- parent_col
inconsistent_children <- ped$ID[ped[[child_col]] %in% inconsistent_parents]


if (verbose) {
cat(paste0("Role: ", parent_col, "\n"))
cat(length(unique_sexes), " unique sex codes found: ", paste(unique_sexes, collapse = ", "), "\n")
cat("Modal sex code: ", modal_sex, "\n")

if (all_same_sex) {
cat("All parents consistently coded.\n")
} else cat(length(inconsistent_parents), " parents have inconsistent sex coding.\n")
}

return(list(
role = parent_col,
unique_sexes = unique_sexes,
modal_sex = modal_sex,
all_same_sex = all_same_sex,
inconsistent_parents = inconsistent_parents,
inconsistent_children = inconsistent_children
))
}

#' Get the Modal Value of a Vector

.getModalValue <- function(x) {
if (length(na.omit(x)) == 0) return(NA)
freq_table <- sort(table(x), decreasing = TRUE)
modal <- names(freq_table)[1]
return(modal)
}
2 changes: 1 addition & 1 deletion R/cleanPedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
standardizeColnames <- function(df, verbose = FALSE) {
# Internal mapping of standardized names to possible variants
mapping <- list(
"fam" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
"famID" = "^(?:fam(?:ily)?[\\.\\-_]?(?:id)?)",
"ID" = "^(?:i(?:d$|ndiv(?:idual)?)|p(?:erson)?[\\.\\-_]?id)",
"gen" = "^(?:gen(?:s|eration)?)",
"dadID" = "^(?:d(?:ad)?id|paid|fatherid|pid[\\.\\-_]?fath[er]*|sire)",
Expand Down
17 changes: 2 additions & 15 deletions R/constructAdjacency.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
.adjDirect <- function(ped, component, saveable, resume,
save_path, verbose, lastComputed,
nr, checkpoint_files, update_rate,
parList, lens, save_rate_parlist, adjBeta_method,
parList, lens, save_rate_parlist,
...) {
# Loop through each individual in the pedigree
# Build the adjacency matrix for parent-child relationships
Expand Down Expand Up @@ -177,18 +177,6 @@
iss <- unlist(iss_list, use.names = FALSE)
jss <- unlist(jss_list, use.names = FALSE)

# list_of_adjacency <- .adjBeta(ped=ped,adjBeta_method=adjBeta_method,
# component = component,
# saveable = saveable, resume = resume,
# save_path = save_path, verbose = verbose,
# lastComputed = lastComputed, nr = nr,
# checkpoint_files = checkpoint_files,
# update_rate = update_rate,
# parList = parList,
# lens = lens, save_rate_parlist = save_rate_parlist,
# ...)

# return(list_of_adjacency)
} else if (component %in% c("mitochondrial")) {
mIDs <- stats::na.omit(data.frame(rID = ped$ID, cID = ped$momID))
iss <- c(mIDs$rID)
Expand Down Expand Up @@ -459,8 +447,7 @@ computeParentAdjacency <- function(ped, component,

"indexed" = {
# Garrison version
.adjIndexed(
ped = ped,
.adjIndexed(ped = ped,
component = component,
saveable = saveable,
resume = resume,
Expand Down
4 changes: 2 additions & 2 deletions R/tweakPedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
makeTwins <- function(ped, ID_twin1 = NA_integer_, ID_twin2 = NA_integer_, gen_twin = 2, verbose = FALSE) {
# Check if the ped is the same format as the output of simulatePedigree
if (paste0(colnames(ped), collapse = "") != paste0(c(
"fam", "ID", "gen",
"famID", "ID", "gen",
"dadID", "momID", "spID", "sex"
), collapse = "")) {
ped <- standardizeColnames(ped, verbose = verbose)
Expand Down Expand Up @@ -128,7 +128,7 @@ makeInbreeding <- function(ped,
if (paste0(colnames(ped),
collapse = ""
) != paste0(
c("fam", "ID", "gen", "dadID", "momID", "spID", "sex"),
c("famID", "ID", "gen", "dadID", "momID", "spID", "sex"),
collapse = ""
)) {
ped <- standardizeColnames(ped, verbose = verbose)
Expand Down
Loading
Loading