Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
5029937
add update rate
smasongarrison Apr 8, 2025
e22c933
allow verbose argument to be passed to standardizeColnames
smasongarrison Apr 8, 2025
a8c2b6c
refactor checkids
smasongarrison Apr 8, 2025
3326f75
docs
smasongarrison Apr 8, 2025
6019916
documentation
smasongarrison Apr 9, 2025
7f24910
Updatedocs
smasongarrison Apr 10, 2025
61fb620
renaming beta
smasongarrison Apr 10, 2025
2c0961c
Update .gitignore
smasongarrison Apr 10, 2025
edf2745
reorder coms
smasongarrison Apr 10, 2025
39fde6c
Delete benchmark_results.csv
smasongarrison Apr 10, 2025
015c32a
reorder coms
smasongarrison Apr 10, 2025
e2fc3cf
Merge branch 'wikitemplate_parser' of https://github.com/R-Computing-…
smasongarrison Apr 10, 2025
0850662
Update test-makeLinks.R
smasongarrison Apr 10, 2025
c5eabef
splitting links into legacy
smasongarrison Apr 10, 2025
8039f67
more tests and exploring option to write out all values
smasongarrison Apr 10, 2025
728d560
additional aliases
smasongarrison Apr 11, 2025
c48b8aa
remove package messaging
smasongarrison Apr 11, 2025
7d8dfe5
Merge branch 'main' into wikitemplate_parser
smasongarrison Apr 11, 2025
9641a04
Delete assignCoupleIds.Rd
smasongarrison Apr 11, 2025
9525061
allow NAs to behave
smasongarrison Apr 11, 2025
c18dbd7
allow NAs to behave
smasongarrison Apr 11, 2025
31f9a42
nicer plots
smasongarrison Apr 12, 2025
76e33bc
Merge branch 'wikitemplate_parser' of https://github.com/R-Computing-…
smasongarrison Apr 12, 2025
c7320eb
more tests
smasongarrison Apr 13, 2025
f45712f
aliases
smasongarrison Apr 13, 2025
d023b78
default now is direct method
smasongarrison Apr 14, 2025
b55ae50
Update readGedcom.R
smasongarrison Apr 14, 2025
2541415
Update readGedcom.R
smasongarrison Apr 14, 2025
65affa3
rename
smasongarrison Apr 14, 2025
1ee34ba
rename
smasongarrison Apr 14, 2025
294186b
Update readGedcomlegacy.R
smasongarrison Apr 14, 2025
8244984
updating tests
smasongarrison Apr 15, 2025
ff69473
rename
smasongarrison Apr 15, 2025
8039214
potential more direct methods
smasongarrison Apr 16, 2025
3293ebd
BENCHMARCKING
smasongarrison Apr 16, 2025
2ae4336
rolling out a direct(ish) method
smasongarrison Apr 17, 2025
76ebddc
styler
smasongarrison Apr 17, 2025
13ac783
oops
smasongarrison Apr 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ tests/testthat/Rplots.pdf
*.ASOIAF.ged
ASOIAF.ged
*.Rproj

benchmark_results.csv
.vscode/launch.json
dataRelatedPairs_new2.csv
data-raw/ASOIAF_040725.ged
dataRelatedPairs.csv
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

export(SimPed)
export(allGens)
export(assignCoupleIDs)
export(calcAllGens)
export(calcFamilySize)
export(calculateRelatedness)
export(checkIDs)
export(checkParentIDs)
Expand All @@ -17,6 +20,7 @@ export(famSizeCal)
export(fitComponentModel)
export(identifyComponentModel)
export(inferRelatedness)
export(insertEven)
export(makeInbreeding)
export(makeTwins)
export(parseTree)
Expand All @@ -30,14 +34,21 @@ export(ped2maternal)
export(ped2mit)
export(ped2paternal)
export(plotPedigree)
export(readGed)
export(readGedcom)
export(readWikifamilytree)
export(readgedcom)
export(recodeSex)
export(related_coef)
export(relatedness)
export(repairSex)
export(resample)
export(simulatePedigree)
export(sizeAllGens)
export(summariseFamilies)
export(summariseMatrilines)
export(summarisePatrilines)
export(summarisePedigrees)
export(summarizeFamilies)
export(summarizeMatrilines)
export(summarizePatrilines)
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
* revived checkParents function to check for handling phantom parents and missing parents
* added tests for checkParents function
* added GoT analysis
* reduced complexity of com2links and summarizePedigree with the use of subfunctions
* reduced complexity of com2links, summarizePedigree, and checkIDs with the use of subfunctions
* allow verbose argument to be passed to standardizeColnames
* list SimPed and related_coef as aliases for functions
* harmonizing function names like calcFamilySize from famSizeCal
* implemented adjBeta function to evaluation alternative build method

# BGmisc 1.3.5.1
* Setting the default for the `sparse` argument in `ped2com()` to TRUE
Expand Down
15 changes: 10 additions & 5 deletions R/buildPedigree.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@
...) {
# Check ped/data.fram
if (!inherits(ped, "data.frame")) {
stop("ped should be a data.frame or inherit to a data.frame")}
stop("ped should be a data.frame or inherit to a data.frame")

Check warning on line 91 in R/buildPedigree.R

View check run for this annotation

Codecov / codecov/patch

R/buildPedigree.R#L91

Added line #L91 was not covered by tests
}
# Handle adjacent argument
adjacent <- match.arg(tolower(adjacent)[1],
choices = c(
Expand Down Expand Up @@ -182,8 +183,10 @@
momID = "momID", dadID = "dadID",
matID = "matID", ...) {
# Call to wrapper function
.ped2id(ped = ped, personID = personID, momID = momID,
dadID = dadID, famID = matID, type = "mothers")
.ped2id(
ped = ped, personID = personID, momID = momID,
dadID = dadID, famID = matID, type = "mothers"
)
}

#' Add a paternal line ID variable to a pedigree
Expand All @@ -203,6 +206,8 @@
momID = "momID", dadID = "dadID",
patID = "patID", ...) {
# Call to wrapper function
.ped2id(ped = ped, personID = personID, momID = momID,
dadID = dadID, famID = patID, type = "fathers")
.ped2id(
ped = ped, personID = personID, momID = momID,
dadID = dadID, famID = patID, type = "fathers"
)
}
23 changes: 16 additions & 7 deletions R/calculateFamilySize.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @param marR Mating rate (numeric value ranging from 0 to 1).
#' @return Returns a vector containing the number of individuals in every generation.
#' @export
allGens <- function(kpc, Ngen, marR) {
calcAllGens <- function(kpc, Ngen, marR) {
# Check if the number of generations is valid
if (Ngen < 1) {
stop("The number of generations should be an integer greater or equal than 1")
Expand All @@ -23,14 +23,16 @@ allGens <- function(kpc, Ngen, marR) {
}
return(allGens)
}

#' @rdname calcAllGens
#' @export
allGens <- calcAllGens

#' sizeAllGens
#' An internal supporting function for \code{simulatePedigree}.
#' @inheritParams allGens
#' @inheritParams calcAllGens
#' @return Returns a vector including the number of individuals in every generation.

sizeAllGens <- function(kpc, Ngen, marR) {
calcFamilySizeByGen <- function(kpc, Ngen, marR) {
Nmid <- Ngen - 2
midGens <- numeric(length = Nmid)

Expand All @@ -46,14 +48,16 @@ sizeAllGens <- function(kpc, Ngen, marR) {
# print(allGens)
return(allGens)
}

#' @rdname calcFamilySizeByGen
#' @export
sizeAllGens <- calcFamilySizeByGen

#' famSizeCal
#' A function to calculate the total number of individuals in a pedigree given parameters. This is a supporting function for function \code{simulatePedigree}
#' @inheritParams allGens
#' @inheritParams calcAllGens
#' @return Returns a numeric value indicating the total pedigree size.
#' @export
famSizeCal <- function(kpc, Ngen, marR) {
calcFamilySize <- function(kpc, Ngen, marR) {
if (Ngen < 1) {
stop("The number of generations should be an integer greater than or equal to 1")
} else if (Ngen == 1) {
Expand All @@ -71,3 +75,8 @@ famSizeCal <- function(kpc, Ngen, marR) {
}
return(size)
}

#' @rdname calcFamilySize
#' @export
#'
famSizeCal <- calcFamilySize
161 changes: 79 additions & 82 deletions R/checkIDs.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' @export
checkIDs <- function(ped, verbose = FALSE, repair = FALSE) {
# Standardize column names in the input dataframe
ped <- standardizeColnames(ped)
ped <- standardizeColnames(ped, verbose = verbose)

# Initialize a list to store validation results
validation_results <- list()
Expand All @@ -28,91 +28,15 @@
}

# Identify non-unique IDs
duplicated_ids <- ped$ID[duplicated(ped$ID) | duplicated(ped$ID, fromLast = TRUE)]

id_check <- checkIDuniqueness(ped = ped, verbose = verbose)

# Update the validation_results list
if (length(duplicated_ids) > 0) {
if (verbose) {
cat(paste0(length(duplicated_ids), " non-unique IDs found.\n"))
}
validation_results$all_unique_ids <- FALSE
validation_results$total_non_unique_ids <- length(duplicated_ids)
validation_results$non_unique_ids <- unique(duplicated_ids)
} else {
if (verbose) {
cat("All IDs are unique.\n")
}
validation_results$all_unique_ids <- TRUE
validation_results$total_non_unique_ids <- 0
validation_results$non_unique_ids <- NULL
}
if (verbose) {
cat("Step 2: Checking for within row duplicats...\n")
cat("Is own father?\n")
}
is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)]
if (verbose) {
cat("Is own mother?\n")
}
is_own_mother <- ped$ID[ped$ID == ped$momID & !is.na(ped$momID)]
if (verbose) {
cat("Is mother father?\n")
}
duplicated_parents <- ped$ID[ped$dadID == ped$momID & !is.na(ped$dadID) & !is.na(ped$momID)]
row_check <- checkWithinRowDuplicates(ped = ped, verbose = verbose)

# get the total number of within row duplicates
validation_results$total_own_father <- length(is_own_father)
validation_results$total_own_mother <- length(is_own_mother)
validation_results$total_duplicated_parents <- length(duplicated_parents)
validation_results$total_within_row_duplicates <- sum(length(is_own_father), length(is_own_mother), length(duplicated_parents))
# Update the validation_results list
validation_results <- c(id_check, row_check)

if (validation_results$total_within_row_duplicates > 0) {
if (verbose) {
cat(paste0(
validation_results$total_within_row_duplicates,
" within row duplicates found.\n"
))
}
validation_results$within_row_duplicates <- TRUE
if (validation_results$total_own_father > 0) {
validation_results$is_own_father_ids <- unique(is_own_father)
if (verbose) {
cat(paste0(
validation_results$total_own_father,
" individuals are their own fathers.\n"
))
}
}
if (validation_results$total_own_mother > 0) {
validation_results$is_own_mother_ids <- unique(is_own_mother)
if (verbose) {
cat(paste0(
validation_results$total_own_mother,
" individuals are their own mothers.\n"
))
}
}
if (validation_results$total_duplicated_parents > 0) {
validation_results$duplicated_parents_ids <- unique(duplicated_parents)
if (verbose) {
cat(paste0(
validation_results$total_duplicated_parents,
" individuals have the same mother and father.\n"
))
}
}
} else {
if (verbose) {
cat("No within row duplicates found.\n")
}
validation_results$within_row_duplicates <- FALSE
validation_results$total_within_row_duplicates <- 0
validation_results$is_own_father_ids <- NULL
validation_results$is_own_mother_ids <- NULL
validation_results$duplicated_parents_ids <- NULL
}
if (verbose) {
cat("Validation Results:\n")
print(validation_results)
Expand Down Expand Up @@ -163,9 +87,82 @@
#' Repair Missing IDs
#'
#' This function repairs missing IDs in a pedigree.
#' @param ped A pedigree object
#' @param verbose A logical indicating whether to print progress messages
#' @inheritParams checkIDs
#' @return A corrected pedigree
repairIDs <- function(ped, verbose = FALSE) {
checkIDs(ped = ped, verbose = verbose, repair = TRUE)
}

#' Check for duplicated individual IDs
#'
#' This function checks for duplicated individual IDs in a pedigree.
#'
#' @inheritParams checkIDs
#' @return A list containing the results of the check
#'
checkIDuniqueness <- function(ped, verbose = FALSE) {
# Identify non-unique IDs

duplicated_ids <- ped$ID[duplicated(ped$ID) | duplicated(ped$ID, fromLast = TRUE)]

if (verbose) {
if (length(duplicated_ids) > 0) {
cat(length(duplicated_ids), " non-unique IDs found.\n")
} else {
cat("All IDs are unique.\n")
}
}

# Update the validation_results list
list(
all_unique_ids = length(duplicated_ids) == 0,
total_non_unique_ids = length(duplicated_ids),
non_unique_ids = if (length(duplicated_ids) > 0) unique(duplicated_ids) else NULL
)
}



#' Check for within-row duplicates (self-parents, same mom/dad)
#'
#' This function checks for within-row duplicates in a pedigree.
#'
#' @inheritParams checkIDs
#' @return A list containing the results of the check
#'
checkWithinRowDuplicates <- function(ped, verbose = FALSE) {
# is the individual their own father or mother?
is_own_father <- ped$ID[ped$ID == ped$dadID & !is.na(ped$dadID)]
is_own_mother <- ped$ID[ped$ID == ped$momID & !is.na(ped$momID)]

# is mother and father the same?
duplicated_parents <- ped$ID[
ped$dadID == ped$momID &
!is.na(ped$dadID) & !is.na(ped$momID)
]

# get the total number of within row duplicates
total <- length(is_own_father) + length(is_own_mother) + length(duplicated_parents)

if (verbose) {
if (total > 0) {
cat(total, " within row duplicates found.\n")
if (length(is_own_father) > 0) cat(length(is_own_father), " individuals are their own fathers.\n")
if (length(is_own_mother) > 0) cat(length(is_own_mother), " individuals are their own mothers.\n")
if (length(duplicated_parents) > 0) cat(length(duplicated_parents), " individuals have the same mother and father.\n")

Check warning on line 152 in R/checkIDs.R

View check run for this annotation

Codecov / codecov/patch

R/checkIDs.R#L149-L152

Added lines #L149 - L152 were not covered by tests
} else {
cat("No within row duplicates found.\n")
}
}
# Update the validation_results list
list(
total_own_father = length(is_own_father),
total_own_mother = length(is_own_mother),
total_duplicated_parents = length(duplicated_parents),
total_within_row_duplicates = total,
within_row_duplicates = total > 0,
is_own_father_ids = if (length(is_own_father) > 0) unique(is_own_father) else NULL,
is_own_mother_ids = if (length(is_own_mother) > 0) unique(is_own_mother) else NULL,
duplicated_parents_ids = if (length(duplicated_parents) > 0) unique(duplicated_parents) else NULL
)
}
Loading
Loading