From 8a1856b715a6eaaec3d5debd4a147197b69a86ff Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 5 May 2025 08:59:49 -0400 Subject: [PATCH 1/4] Update README.md Update README.Rmd Signed-off-by: Mason Garrison --- README.Rmd | 3 +++ README.md | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/README.Rmd b/README.Rmd index a6d5b2c..67e16c2 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,7 +21,10 @@ knitr::opts_chunk$set( [![R package version](https://www.r-pkg.org/badges/version/discord)](https://cran.r-project.org/package=discord) [![Package downloads](https://cranlogs.r-pkg.org/badges/grand-total/discord)](https://cran.r-project.org/package=discord)
[![R-CMD-check](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml) +[![Dev Main branch](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml) +[![Codecov test coverage](https://codecov.io/gh/R-Computing-Lab/discord/graph/badge.svg)](https://app.codecov.io/gh/R-Computing-Lab/discord) ![License](https://img.shields.io/badge/License-GPL_v3-blue.svg) + The goal of discord is to provide functions for discordant kinship diff --git a/README.md b/README.md index ffc931b..5c39c97 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,12 @@ version](https://www.r-pkg.org/badges/version/discord)](https://cran.r-project.o [![Package downloads](https://cranlogs.r-pkg.org/badges/grand-total/discord)](https://cran.r-project.org/package=discord)
[![R-CMD-check](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml) +[![Dev Main +branch](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml) +[![Codecov test +coverage](https://codecov.io/gh/R-Computing-Lab/discord/graph/badge.svg)](https://app.codecov.io/gh/R-Computing-Lab/discord) ![License](https://img.shields.io/badge/License-GPL_v3-blue.svg) + The goal of discord is to provide functions for discordant kinship From 97e460bbed3d94aafa66053af8ee63d7e11a7ba0 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 19 May 2025 00:45:16 -0400 Subject: [PATCH 2/4] styling added unique --- NEWS.md | 1 + R/func_discord_data.R | 8 +- tests/testthat/test-discord_between.R | 177 +++++++++--------- tests/testthat/test-discord_regression.R | 87 +++++---- .../test-discord_regression_arguments.R | 46 ++--- vignettes/Power.Rmd | 33 ++-- 6 files changed, 181 insertions(+), 171 deletions(-) diff --git a/NEWS.md b/NEWS.md index b20376e..a52d691 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ * Vectorizing `discord_data()` to improve performance. * Adding tests to ensure comparability between optimized and non-optimized versions of `discord_data()`. * Adding `discord_between_model()` to get the between-family model +* Added unique filter for `discord_data()` to ensure that the data is not duplicated. # discord 1.2.3.1 * More mild improvements to documentation diff --git a/R/func_discord_data.R b/R/func_discord_data.R index 7cc6da0..c18493b 100644 --- a/R/func_discord_data.R +++ b/R/func_discord_data.R @@ -47,7 +47,7 @@ discord_data <- function(data, fast = TRUE, ...) { if (fast) { - discord_data_fast( + unique(discord_data_fast( data = data, outcome = outcome, id = id, @@ -58,9 +58,9 @@ discord_data <- function(data, predictors = predictors, coding_method = coding_method, ... - ) + )) } else { - discord_data_ram_optimized( + unique(discord_data_ram_optimized( data = data, outcome = outcome, id = id, @@ -71,7 +71,7 @@ discord_data <- function(data, predictors = predictors, coding_method = coding_method, ... - ) + )) } } diff --git a/tests/testthat/test-discord_between.R b/tests/testthat/test-discord_between.R index f724ffd..0512a19 100644 --- a/tests/testthat/test-discord_between.R +++ b/tests/testthat/test-discord_between.R @@ -14,24 +14,24 @@ signif_threshold <- 0.05 test_that("monozygotic significant between-model is as expected", { set.seed(18) results_fast <- discord_between_model(mz_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = TRUE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = TRUE ) set.seed(18) results_ram <- discord_between_model(mz_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_equal(results_fast, results_ram, tolerance = 0.005) @@ -43,23 +43,23 @@ test_that("monozygotic significant between-model is as expected", { test_that("monozygotic nonsignificant between-model is as expected", { set.seed(18) results_fast <- discord_between_model(mz_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = TRUE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = TRUE ) set.seed(18) results_ram <- discord_between_model(mz_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_equal(results_fast, results_ram, tolerance = 0.005) expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold) @@ -69,104 +69,103 @@ test_that("monozygotic nonsignificant between-model is as expected", { test_that("dizygotic significant between-model is as expected", { set.seed(18) results_fast <- discord_between_model(dz_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = TRUE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = TRUE ) set.seed(18) results_ram <- discord_between_model(dz_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold) expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold) expect_equal(results_fast, results_ram, tolerance = 0.005) - - }) +}) test_that("dizygotic nonsignificant between-model is as expected", { set.seed(18) results_fast <- discord_between_model(dz_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = TRUE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = TRUE ) set.seed(18) results_ram <- discord_between_model(dz_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold) expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold) expect_equal(results_fast, results_ram, tolerance = 0.005) - }) +}) test_that("half siblings significant between-model is as expected", { set.seed(18) results_fast <- discord_between_model(half_sibs_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = TRUE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = TRUE ) set.seed(18) results_ram <- discord_between_model(half_sibs_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold) expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold) expect_equal(results_fast, results_ram, tolerance = 0.005) - }) +}) test_that("half siblings nonsignificant between-model is as expected", { set.seed(18) results_fast <- discord_between_model(half_sibs_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = TRUE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = TRUE ) set.seed(18) results_ram <- discord_between_model(half_sibs_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold) expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold) expect_equal(results_fast, results_ram, tolerance = 0.005) - }) +}) diff --git a/tests/testthat/test-discord_regression.R b/tests/testthat/test-discord_regression.R index e52e24e..dffa0d2 100644 --- a/tests/testthat/test-discord_regression.R +++ b/tests/testthat/test-discord_regression.R @@ -20,18 +20,17 @@ test_that("monozygotic significant is as expected", { fast = TRUE ) results_ram <- discord_regression(mz_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value(results_fast), expected = signif_threshold) expect_lt(object = get_p_value(results_ram), expected = signif_threshold) expect_equal(get_p_value(results_fast), get_p_value(results_ram), tolerance = 0.005) - }) test_that("monozygotic nonsignificant is as expected", { @@ -46,13 +45,13 @@ test_that("monozygotic nonsignificant is as expected", { fast = TRUE ) results_ram <- discord_regression(mz_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_gt(object = get_p_value(results_fast), expected = signif_threshold) expect_gt(object = get_p_value(results_ram), expected = signif_threshold) @@ -71,13 +70,13 @@ test_that("dizygotic significant is as expected", { fast = TRUE ) results_ram <- discord_regression(dz_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value(results_fast), expected = signif_threshold) expect_lt(object = get_p_value(results_ram), expected = signif_threshold) @@ -97,13 +96,13 @@ test_that("dizygotic nonsignificant is as expected", { ) results_ram <- discord_regression(dz_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_gt(object = get_p_value(results_fast), expected = signif_threshold) @@ -124,13 +123,13 @@ test_that("half siblings significant is as expected", { fast = TRUE ) results_ram <- discord_regression(half_sibs_signif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast = FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_lt(object = get_p_value(results_fast), expected = signif_threshold) @@ -147,16 +146,16 @@ test_that("half siblings nonsignificant is as expected", { sex = NULL, race = NULL, pair_identifiers = c("_1", "_2"), - fast= TRUE + fast = TRUE ) results_ram <- discord_regression(half_sibs_nonsignif, - outcome = "y1", - predictors = "y2", - id = "id", - sex = NULL, - race = NULL, - pair_identifiers = c("_1", "_2"), - fast= FALSE + outcome = "y1", + predictors = "y2", + id = "id", + sex = NULL, + race = NULL, + pair_identifiers = c("_1", "_2"), + fast = FALSE ) expect_gt(object = get_p_value(results_fast), expected = signif_threshold) diff --git a/tests/testthat/test-discord_regression_arguments.R b/tests/testthat/test-discord_regression_arguments.R index 0b7d071..4094820 100644 --- a/tests/testthat/test-discord_regression_arguments.R +++ b/tests/testthat/test-discord_regression_arguments.R @@ -41,9 +41,9 @@ test_that("flu_2008 ~ edu_2008 + ses_2008 + race + sex", { set.seed(18) new_ram <- discord_regression(uniqueExtendedIDs, - outcome = "flu_2008", - predictors = c("edu_2008", "ses_2008"), - fast = FALSE + outcome = "flu_2008", + predictors = c("edu_2008", "ses_2008"), + fast = FALSE ) new_ram <- summarize_results(new_ram) @@ -82,16 +82,16 @@ test_that("flu_2008 ~ edu_2008 + ses_2008 + race", { id = "extended_id", predictors = c("edu_2008", "ses_2008"), sex = NULL, - fast= TRUE + fast = TRUE ) set.seed(18) new_ram <- discord_regression(uniqueExtendedIDs, - outcome = "flu_2008", - id = "extended_id", - predictors = c("edu_2008", "ses_2008"), - sex = NULL, - fast= FALSE + outcome = "flu_2008", + id = "extended_id", + predictors = c("edu_2008", "ses_2008"), + sex = NULL, + fast = FALSE ) new_fast <- summarize_results(new_fast) new_ram <- summarize_results(new_ram) @@ -134,12 +134,12 @@ test_that("flu_2008 ~ edu_2008 + ses_2008", { ) set.seed(18) new_ram <- discord_regression(uniqueExtendedIDs, - outcome = "flu_2008", - id = "extended_id", - predictors = c("edu_2008", "ses_2008"), - sex = NULL, - race = NULL, - fast = FALSE + outcome = "flu_2008", + id = "extended_id", + predictors = c("edu_2008", "ses_2008"), + sex = NULL, + race = NULL, + fast = FALSE ) new_fast <- summarize_results(new_fast) @@ -183,15 +183,15 @@ test_that("flu_2008 ~ edu_2008 + ses_2008 + race + sex", { ) set.seed(18) new_ram <- discord_regression(uniqueExtendedIDs, - outcome = "flu_2008", - predictors = "edu_2008", - fast = FALSE + outcome = "flu_2008", + predictors = "edu_2008", + fast = FALSE ) -new_fast <- summarize_results(new_fast) -new_ram <- summarize_results(new_ram) + new_fast <- summarize_results(new_fast) + new_ram <- summarize_results(new_ram) -expect_equal(control, new_fast, tolerance = 0.005) -expect_equal(control, new_ram, tolerance = 0.005) -expect_equal(new_fast, new_ram, tolerance = 0.005) + expect_equal(control, new_fast, tolerance = 0.005) + expect_equal(control, new_ram, tolerance = 0.005) + expect_equal(new_fast, new_ram, tolerance = 0.005) }) diff --git a/vignettes/Power.Rmd b/vignettes/Power.Rmd index befc235..a764ac9 100644 --- a/vignettes/Power.Rmd +++ b/vignettes/Power.Rmd @@ -95,7 +95,6 @@ conditions <- expand.grid( For each condition, we simulate 100 replications of fitting discordant sibling models. We have kept the number of trials low for demonstration purposes, but you can increase `n_trials` for more robust power estimates. ```{r} - set.seed(1492) # Set seed for reproducibility n_trials <- 100 FAST <- FALSE # Set to FALSE for slower, more detailed analysis @@ -206,15 +205,27 @@ facet_labels <- list( cov_e = c("0" = "No Cov_E", "1" = "With Cov_E") ) # Ensure factors with exact string levels -final_results$cov_a <- factor(final_results$cov_a, levels = c(0, 0.25), - labels = c("No Covariate A", - "Covariate A (0.25)")) -final_results$cov_c <- factor(final_results$cov_c, levels = c(0, 0.25), - labels = c("No Covariate C", - "Covariate C (0.25)")) -final_results$cov_e <- factor(final_results$cov_e, levels = c(0, 0.25), - labels = c("No Covariate E", - "Covariate E (0.25)")) +final_results$cov_a <- factor(final_results$cov_a, + levels = c(0, 0.25), + labels = c( + "No Covariate A", + "Covariate A (0.25)" + ) +) +final_results$cov_c <- factor(final_results$cov_c, + levels = c(0, 0.25), + labels = c( + "No Covariate C", + "Covariate C (0.25)" + ) +) +final_results$cov_e <- factor(final_results$cov_e, + levels = c(0, 0.25), + labels = c( + "No Covariate E", + "Covariate E (0.25)" + ) +) ``` @@ -266,7 +277,7 @@ p_noa <- ggplot(final_results_noa, aes( color = "Relatedness", fill = "Relatedness" ) + - theme_minimal() + + theme_minimal() + geom_text( x = 600, y = 0.55, label = "False Positive Rate", data = data.frame(cov_c = "No Covariate C", cov_e = "No Covariate E"), From ea56c1eec3864d2985ddbd91c2ee1c02f2f868c7 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 19 May 2025 11:15:19 -0400 Subject: [PATCH 3/4] add a test and rename internal function --- R/func_kinsim.R | 12 ++++++------ R/helpers_simulation.R | 13 +++++++------ man/{rmvn.Rd => dot-rmvn.Rd} | 6 +++--- tests/testthat/test-kinsim.R | 6 ++++-- 4 files changed, 20 insertions(+), 17 deletions(-) rename man/{rmvn.Rd => dot-rmvn.Rd} (92%) diff --git a/R/func_kinsim.R b/R/func_kinsim.R index c1fa83f..82488ad 100644 --- a/R/func_kinsim.R +++ b/R/func_kinsim.R @@ -137,7 +137,7 @@ kinsim <- function( sigma_a[4, 1] <- cov_a * r_all[i] sigma_a[3, 2] <- cov_a * r_all[i] sigma_a[2, 3] <- cov_a * r_all[i] - A.r <- rmvn(n, + A.r <- .rmvn(n, sigma = sigma_a ) @@ -154,7 +154,7 @@ kinsim <- function( sigma_c[4, 1] <- cov_c * 1 sigma_c[3, 2] <- cov_c * 1 sigma_c[2, 3] <- cov_c * 1 - C.r <- rmvn(n, + C.r <- .rmvn(n, sigma = sigma_c ) C.r[, 1:2] <- C.r[, 1:2] * sC[1] @@ -166,7 +166,7 @@ kinsim <- function( sigma_e[3, 1] <- cov_e sigma_e[2, 4] <- cov_e sigma_e[4, 2] <- cov_e - E.r <- rmvn(n, + E.r <- .rmvn(n, sigma = sigma_e ) E.r[, 1:2] <- E.r[, 1:2] * sE[1] @@ -225,7 +225,7 @@ kinsim <- function( sigma_a[3, 2] <- cov_a * r_val sigma_a[2, 3] <- cov_a * r_val - A_tmp <- rmvn(n_sub, sigma = sigma_a) + A_tmp <- .rmvn(n_sub, sigma = sigma_a) A.r[idx, 1:2] <- A_tmp[, 1:2] * sA[1] A.r[idx, 3:4] <- A_tmp[, 3:4] * sA[2] @@ -241,7 +241,7 @@ kinsim <- function( sigma_c[3, 2] <- cov_c * 1 sigma_c[2, 3] <- cov_c * 1 - C_tmp <- rmvn(n_sub, sigma = sigma_c) + C_tmp <- .rmvn(n_sub, sigma = sigma_c) C.r[idx, 1:2] <- C_tmp[, 1:2] * sC[1] C.r[idx, 3:4] <- C_tmp[, 3:4] * sC[2] @@ -252,7 +252,7 @@ kinsim <- function( sigma_e[2, 4] <- cov_e sigma_e[4, 2] <- cov_e - E_tmp <- rmvn(n_sub, sigma = sigma_e) + E_tmp <- .rmvn(n_sub, sigma = sigma_e) E.r[idx, 1:2] <- E_tmp[, 1:2] * sE[1] E.r[idx, 3:4] <- E_tmp[, 3:4] * sE[2] } diff --git a/R/helpers_simulation.R b/R/helpers_simulation.R index 8ed4be3..5b2fdac 100644 --- a/R/helpers_simulation.R +++ b/R/helpers_simulation.R @@ -8,7 +8,7 @@ #' @return Matrix of dimension \code{n × ncol(sigma)} containing random samples #' from the multivariate normal distribution. #' @keywords internal -rmvn <- function(n, sigma) { +.rmvn <- function(n, sigma) { Sh <- with( svd(sigma), v %*% diag(sqrt(d)) %*% t(u) @@ -89,16 +89,17 @@ kinsim_internal <- function( # Generate data for each relatedness group - for (i in 1:length(r)) { +# for (i in 1:length(r)) { + for (i in seq_along(r)) { n <- npergroup[i] # Generate correlated genetic components based on relatedness - A.r <- sA * rmvn(n, sigma = diag(2) + S2 * r[i]) + A.r <- sA * .rmvn(n, sigma = diag(2) + S2 * r[i]) # Generate shared environmental components (same for both members) # C.r <- stats::rnorm(n,sd = sC) # C.r <- cbind(C.r,C.r ) - C.r <- sC * rmvn(n, sigma = diag(2) + S2 * c_rel) + C.r <- sC * .rmvn(n, sigma = diag(2) + S2 * c_rel) # Generate non-shared environmental components (different for each member) E.r <- cbind( @@ -140,7 +141,7 @@ kinsim_internal <- function( # Generate genetic components for each unique relatedness value for (i in 1:length(unique_r)) { n <- length(r_vector[r_vector == unique_r[i]]) - A.rz <- sA * rmvn(n, sigma = diag(2) + S2 * unique_r[i]) + A.rz <- sA * .rmvn(n, sigma = diag(2) + S2 * unique_r[i]) data_vector$A.r1[data_vector$r_vector == unique_r[i]] <- A.rz[, 1] data_vector$A.r2[data_vector$r_vector == unique_r[i]] <- A.rz[, 2] } @@ -149,7 +150,7 @@ kinsim_internal <- function( data_vector$A.r1, data_vector$A.r2 ), ncol = 2) - C.r <- sC * rmvn(n, sigma = diag(2) + S2 * c_rel) + C.r <- sC * .rmvn(n, sigma = diag(2) + S2 * c_rel) E.r <- cbind( stats::rnorm(n, sd = sE), diff --git a/man/rmvn.Rd b/man/dot-rmvn.Rd similarity index 92% rename from man/rmvn.Rd rename to man/dot-rmvn.Rd index 2674aae..a0bd240 100644 --- a/man/rmvn.Rd +++ b/man/dot-rmvn.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/helpers_simulation.R -\name{rmvn} -\alias{rmvn} +\name{.rmvn} +\alias{.rmvn} \title{Generate Multivariate Normal Random Variates} \usage{ -rmvn(n, sigma) +.rmvn(n, sigma) } \arguments{ \item{n}{Integer. Number of samples to generate.} diff --git a/tests/testthat/test-kinsim.R b/tests/testthat/test-kinsim.R index eb5ae22..537ffb0 100644 --- a/tests/testthat/test-kinsim.R +++ b/tests/testthat/test-kinsim.R @@ -161,11 +161,13 @@ test_that("genetic correlation between variables is present when cov_a is non-ze expect_gt(cor_val, 0.1) # minimal expected correlation }) -test_that("kinsim handles r_vector input correctly", { +test_that("kinsim handles r_vector and c_vecttor input correctly", { r_vec <- rep(c(1, 0.5), each = 100) - df <- kinsim(r_vector = r_vec) + c_vector <- rep(1, 200) + df <- kinsim(r_vector = r_vec, c_vector = c_vector) expect_equal(nrow(df), 200) expect_equal(df$r, r_vec) + expect_equal(df$C1_1, df$C1_2) }) test_that("output has correct ID range", { From 97e7ee7d748f7f80be4672898b4951d2c1547ff9 Mon Sep 17 00:00:00 2001 From: Mason Garrison Date: Mon, 19 May 2025 11:32:16 -0400 Subject: [PATCH 4/4] Update test-discord_regression.R --- NEWS.md | 1 + R/func_discord_regression.R | 6 +- tests/testthat/test-discord_regression.R | 182 +++++++++++++++++++++++ 3 files changed, 188 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a52d691..13a8d18 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ * Adding tests to ensure comparability between optimized and non-optimized versions of `discord_data()`. * Adding `discord_between_model()` to get the between-family model * Added unique filter for `discord_data()` to ensure that the data is not duplicated. +* Added tests for categorical variables in `discord_data()`. # discord 1.2.3.1 * More mild improvements to documentation diff --git a/R/func_discord_regression.R b/R/func_discord_regression.R index f4e94d6..03d4459 100644 --- a/R/func_discord_regression.R +++ b/R/func_discord_regression.R @@ -28,8 +28,12 @@ discord_regression <- function(data, data_processed = FALSE, coding_method = "none", fast = TRUE) { + if( data_processed == TRUE & !is.data.frame(data) ) { + stop("data must be a data frame if data_processed is TRUE") + } + if( data_processed == FALSE){ check_discord_errors(data = data, id = id, sex = sex, race = race, pair_identifiers = pair_identifiers) - +} # if no demographics provided if (is.null(demographics)) { if (is.null(sex) & is.null(race)) { diff --git a/tests/testthat/test-discord_regression.R b/tests/testthat/test-discord_regression.R index dffa0d2..066fc3c 100644 --- a/tests/testthat/test-discord_regression.R +++ b/tests/testthat/test-discord_regression.R @@ -162,3 +162,185 @@ test_that("half siblings nonsignificant is as expected", { expect_gt(object = get_p_value(results_ram), expected = signif_threshold) expect_equal(get_p_value(results_fast), get_p_value(results_ram), tolerance = 0.005) }) + + +default_setup <- function() { + set.seed(2023) + library(NlsyLinks) + library(dplyr) + data(data_flu_ses) + link_pairs <- Links79PairExpanded %>% + filter(RelationshipPath == "Gen1Housemates" & RFull == 0.5) + df_link <- CreatePairLinksSingleEntered( + outcomeDataset = data_flu_ses, + linksPairDataset = link_pairs, + outcomeNames = c("S00_H40", "RACE", "SEX") + ) %>% + filter(!is.na(S00_H40_S1) & !is.na(S00_H40_S2)) %>% + mutate( + SEX_S1 = ifelse(SEX_S1 == 0, "MALE", "FEMALE"), + SEX_S2 = ifelse(SEX_S2 == 0, "MALE", "FEMALE"), + RACE_S1 = ifelse(RACE_S1 == 0, "NONMINORITY", "MINORITY"), + RACE_S2 = ifelse(RACE_S2 == 0, "NONMINORITY", "MINORITY") + ) %>% + filter(RACE_S1 == RACE_S2) %>% + group_by(ExtendedID) %>% + slice_sample() %>% + ungroup() + return(df_link) +} + +test_that("discord_data 'binary' coding excludes multi columns", { + df_link <- default_setup() + cd_bin <- discord_regression( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "both", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "binary" + ) + expect_true("SEX_binarymatch" %in% names(cd_bin$model)) + expect_false("SEX_multimatch" %in% names(cd_bin$model)) + expect_true("RACE_binarymatch" %in% names(cd_bin$model)) + expect_false("RACE_multimatch" %in% names(cd_bin$model)) +}) + + + + +test_that("discord_data with sex coding returns expected columns and values", { + + set.seed(2023) + data(data_flu_ses) + + df_link <- default_setup() + + cat_sex <- discord_data( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "sex", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "both" + ) + expect_true(all(cat_sex$SEX_multimatch %in% c("MALE", "FEMALE", "mixed"))) + expect_true(all(cat_sex$SEX_binarymatch %in% c(0, 1))) + + cat_sex_model <- discord_regression( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "sex", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "binary" + ) + + expect_true("SEX_binarymatch" %in% names(cat_sex_model$model)) + expect_false("SEX_multimatch" %in% names(cat_sex_model$model)) + + expect_false("RACE_binarymatch" %in% names(cat_sex_model$model)) + expect_false("RACE_multimatch" %in% names(cat_sex_model$model)) + +}) + +test_that("discord_data with race coding returns expected columns and values", { + set.seed(2023) + data(data_flu_ses) + df_link <- default_setup() + # reuse df_link from above + cat_race <- discord_data( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "race", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "both" + ) + cat_race_model <- discord_regression( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "race", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "binary" + ) + + + expect_true(all(cat_race$RACE_binarymatch %in% c(0, 1))) + + # sample the distinct levels + expect_setequal(unique(cat_race$RACE_multimatch), + c("NONMINORITY", "MINORITY")) + + expect_false("SEX_multimatch" %in% names(cat_race_model$model)) + expect_false("SEX_binarymatch" %in% names(cat_race_model$model)) + expect_true("RACE_binarymatch" %in% names(cat_race_model$model)) + expect_false("RACE_multimatch" %in% names(cat_race_model$model)) +}) + +test_that("discord_data 'both' coding returns binary and multi columns", { + df_link <- default_setup() + cd <- discord_data( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "both", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "both" + ) + + cd_model <- discord_regression( + data = cd, + data_processed = TRUE, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "both", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "multi" + ) + expect_true(all(c( "SEX_multimatch", + "RACE_multimatch") %in% + names(cd_model$model))) + + expect_false("SEX_binarymatch" %in% names(cd_model$model)) + expect_false("RACE_binarymatch" %in% names(cd_model$model)) +}) + +test_that("discord_regression returns a model with coefficients", { + set.seed(2023) + data(data_flu_ses) + df_link <- default_setup() + + dr_mod <- discord_regression( + data = df_link, + outcome = "S00_H40", + sex = "SEX", + race = "RACE", + demographics = "both", + predictors = NULL, + pair_identifiers = c("_S1", "_S2"), + coding_method = "multi" + ) + + # class check + expect_s3_class(dr_mod, c("lm", "discord_regression")) + # coefficient table is not empty + coefs <- broom::tidy(dr_mod) + expect_true(nrow(coefs) > 0) +}) +