From 8a1856b715a6eaaec3d5debd4a147197b69a86ff Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 5 May 2025 08:59:49 -0400
Subject: [PATCH 1/4] Update README.md

Update README.Rmd

Signed-off-by: Mason Garrison <garrissm@wfu.edu>
---
 README.Rmd | 3 +++
 README.md  | 5 +++++
 2 files changed, 8 insertions(+)
diff --git a/README.Rmd b/README.Rmd
index a6d5b2c..67e16c2 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -21,7 +21,10 @@ knitr::opts_chunk$set(
 [![R package version](https://www.r-pkg.org/badges/version/discord)](https://cran.r-project.org/package=discord)
 [![Package downloads](https://cranlogs.r-pkg.org/badges/grand-total/discord)](https://cran.r-project.org/package=discord)</br>
 [![R-CMD-check](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml)
+[![Dev Main branch](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml)
+[![Codecov test coverage](https://codecov.io/gh/R-Computing-Lab/discord/graph/badge.svg)](https://app.codecov.io/gh/R-Computing-Lab/discord)
 ![License](https://img.shields.io/badge/License-GPL_v3-blue.svg)
+
 <!-- badges: end -->
 
 The goal of discord is to provide functions for discordant kinship
diff --git a/README.md b/README.md
index ffc931b..5c39c97 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,12 @@ version](https://www.r-pkg.org/badges/version/discord)](https://cran.r-project.o
 [![Package
 downloads](https://cranlogs.r-pkg.org/badges/grand-total/discord)](https://cran.r-project.org/package=discord)</br>
 [![R-CMD-check](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-check.yaml)
+[![Dev Main
+branch](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml/badge.svg)](https://github.com/R-Computing-Lab/discord/actions/workflows/R-CMD-dev_check.yaml)
+[![Codecov test
+coverage](https://codecov.io/gh/R-Computing-Lab/discord/graph/badge.svg)](https://app.codecov.io/gh/R-Computing-Lab/discord)
 ![License](https://img.shields.io/badge/License-GPL_v3-blue.svg)
+
 <!-- badges: end -->
 
 The goal of discord is to provide functions for discordant kinship

From 97e460bbed3d94aafa66053af8ee63d7e11a7ba0 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 19 May 2025 00:45:16 -0400
Subject: [PATCH 2/4] styling

added unique
---
 NEWS.md                                       |   1 +
 R/func_discord_data.R                         |   8 +-
 tests/testthat/test-discord_between.R         | 177 +++++++++---------
 tests/testthat/test-discord_regression.R      |  87 +++++----
 .../test-discord_regression_arguments.R       |  46 ++---
 vignettes/Power.Rmd                           |  33 ++--
 6 files changed, 181 insertions(+), 171 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b20376e..a52d691 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,7 @@
 * Vectorizing `discord_data()` to improve performance.
 * Adding tests to ensure comparability between optimized and non-optimized versions of `discord_data()`.
 * Adding `discord_between_model()` to get the between-family model
+* Added unique filter for `discord_data()` to ensure that the data is not duplicated.
 
 # discord 1.2.3.1
 * More mild improvements to documentation
diff --git a/R/func_discord_data.R b/R/func_discord_data.R
index 7cc6da0..c18493b 100644
--- a/R/func_discord_data.R
+++ b/R/func_discord_data.R
@@ -47,7 +47,7 @@ discord_data <- function(data,
                          fast = TRUE,
                          ...) {
   if (fast) {
-    discord_data_fast(
+    unique(discord_data_fast(
       data = data,
       outcome = outcome,
       id = id,
@@ -58,9 +58,9 @@ discord_data <- function(data,
       predictors = predictors,
       coding_method = coding_method,
       ...
-    )
+    ))
   } else {
-    discord_data_ram_optimized(
+    unique(discord_data_ram_optimized(
       data = data,
       outcome = outcome,
       id = id,
@@ -71,7 +71,7 @@ discord_data <- function(data,
       predictors = predictors,
       coding_method = coding_method,
       ...
-    )
+    ))
   }
 }
 
diff --git a/tests/testthat/test-discord_between.R b/tests/testthat/test-discord_between.R
index f724ffd..0512a19 100644
--- a/tests/testthat/test-discord_between.R
+++ b/tests/testthat/test-discord_between.R
@@ -14,24 +14,24 @@ signif_threshold <- 0.05
 test_that("monozygotic significant between-model is as expected", {
   set.seed(18)
   results_fast <- discord_between_model(mz_signif,
-                                        outcome = "y1",
-                                        predictors = "y2",
-                                        id = "id",
-                                        sex = NULL,
-                                        race = NULL,
-                                        pair_identifiers = c("_1", "_2"),
-                                        fast = TRUE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = TRUE
   )
 
   set.seed(18)
   results_ram <- discord_between_model(mz_signif,
-                                       outcome = "y1",
-                                       predictors = "y2",
-                                       id = "id",
-                                       sex = NULL,
-                                       race = NULL,
-                                       pair_identifiers = c("_1", "_2"),
-                                       fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
 
   expect_equal(results_fast, results_ram, tolerance = 0.005)
@@ -43,23 +43,23 @@ test_that("monozygotic significant between-model is as expected", {
 test_that("monozygotic nonsignificant between-model is as expected", {
   set.seed(18)
   results_fast <- discord_between_model(mz_nonsignif,
-                                        outcome = "y1",
-                                        predictors = "y2",
-                                        id = "id",
-                                        sex = NULL,
-                                        race = NULL,
-                                        pair_identifiers = c("_1", "_2"),
-                                        fast = TRUE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = TRUE
   )
   set.seed(18)
   results_ram <- discord_between_model(mz_nonsignif,
-                                       outcome = "y1",
-                                       predictors = "y2",
-                                       id = "id",
-                                       sex = NULL,
-                                       race = NULL,
-                                       pair_identifiers = c("_1", "_2"),
-                                       fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_equal(results_fast, results_ram, tolerance = 0.005)
   expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold)
@@ -69,104 +69,103 @@ test_that("monozygotic nonsignificant between-model is as expected", {
 test_that("dizygotic significant between-model is as expected", {
   set.seed(18)
   results_fast <- discord_between_model(dz_signif,
-                                        outcome = "y1",
-                                        predictors = "y2",
-                                        id = "id",
-                                        sex = NULL,
-                                        race = NULL,
-                                        pair_identifiers = c("_1", "_2"),
-                                        fast = TRUE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = TRUE
   )
   set.seed(18)
   results_ram <- discord_between_model(dz_signif,
-                                       outcome = "y1",
-                                       predictors = "y2",
-                                       id = "id",
-                                       sex = NULL,
-                                       race = NULL,
-                                       pair_identifiers = c("_1", "_2"),
-                                       fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold)
   expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold)
   expect_equal(results_fast, results_ram, tolerance = 0.005)
-
-  })
+})
 
 test_that("dizygotic nonsignificant between-model is as expected", {
   set.seed(18)
   results_fast <- discord_between_model(dz_nonsignif,
-                                        outcome = "y1",
-                                        predictors = "y2",
-                                        id = "id",
-                                        sex = NULL,
-                                        race = NULL,
-                                        pair_identifiers = c("_1", "_2"),
-                                        fast = TRUE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = TRUE
   )
   set.seed(18)
   results_ram <- discord_between_model(dz_nonsignif,
-                                       outcome = "y1",
-                                       predictors = "y2",
-                                       id = "id",
-                                       sex = NULL,
-                                       race = NULL,
-                                       pair_identifiers = c("_1", "_2"),
-                                       fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold)
   expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold)
   expect_equal(results_fast, results_ram, tolerance = 0.005)
-  })
+})
 
 test_that("half siblings significant between-model is as expected", {
   set.seed(18)
   results_fast <- discord_between_model(half_sibs_signif,
-                                        outcome = "y1",
-                                        predictors = "y2",
-                                        id = "id",
-                                        sex = NULL,
-                                        race = NULL,
-                                        pair_identifiers = c("_1", "_2"),
-                                        fast = TRUE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = TRUE
   )
   set.seed(18)
   results_ram <- discord_between_model(half_sibs_signif,
-                                       outcome = "y1",
-                                       predictors = "y2",
-                                       id = "id",
-                                       sex = NULL,
-                                       race = NULL,
-                                       pair_identifiers = c("_1", "_2"),
-                                       fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold)
   expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold)
   expect_equal(results_fast, results_ram, tolerance = 0.005)
-  })
+})
 
 test_that("half siblings nonsignificant between-model is as expected", {
   set.seed(18)
   results_fast <- discord_between_model(half_sibs_nonsignif,
-                                        outcome = "y1",
-                                        predictors = "y2",
-                                        id = "id",
-                                        sex = NULL,
-                                        race = NULL,
-                                        pair_identifiers = c("_1", "_2"),
-                                        fast = TRUE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = TRUE
   )
   set.seed(18)
   results_ram <- discord_between_model(half_sibs_nonsignif,
-                                       outcome = "y1",
-                                       predictors = "y2",
-                                       id = "id",
-                                       sex = NULL,
-                                       race = NULL,
-                                       pair_identifiers = c("_1", "_2"),
-                                       fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_lt(object = get_p_value_between(results_fast), expected = signif_threshold)
   expect_lt(object = get_p_value_between(results_ram), expected = signif_threshold)
   expect_equal(results_fast, results_ram, tolerance = 0.005)
-  })
+})
diff --git a/tests/testthat/test-discord_regression.R b/tests/testthat/test-discord_regression.R
index e52e24e..dffa0d2 100644
--- a/tests/testthat/test-discord_regression.R
+++ b/tests/testthat/test-discord_regression.R
@@ -20,18 +20,17 @@ test_that("monozygotic significant is as expected", {
     fast = TRUE
   )
   results_ram <- discord_regression(mz_signif,
-                                     outcome = "y1",
-                                     predictors = "y2",
-                                     id = "id",
-                                     sex = NULL,
-                                     race = NULL,
-                                     pair_identifiers = c("_1", "_2"),
-                                     fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_lt(object = get_p_value(results_fast), expected = signif_threshold)
   expect_lt(object = get_p_value(results_ram), expected = signif_threshold)
   expect_equal(get_p_value(results_fast), get_p_value(results_ram), tolerance = 0.005)
-
 })
 
 test_that("monozygotic nonsignificant is as expected", {
@@ -46,13 +45,13 @@ test_that("monozygotic nonsignificant is as expected", {
     fast = TRUE
   )
   results_ram <- discord_regression(mz_nonsignif,
-                                     outcome = "y1",
-                                     predictors = "y2",
-                                     id = "id",
-                                     sex = NULL,
-                                     race = NULL,
-                                     pair_identifiers = c("_1", "_2"),
-                                     fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_gt(object = get_p_value(results_fast), expected = signif_threshold)
   expect_gt(object = get_p_value(results_ram), expected = signif_threshold)
@@ -71,13 +70,13 @@ test_that("dizygotic significant is as expected", {
     fast = TRUE
   )
   results_ram <- discord_regression(dz_signif,
-                                     outcome = "y1",
-                                     predictors = "y2",
-                                     id = "id",
-                                     sex = NULL,
-                                     race = NULL,
-                                     pair_identifiers = c("_1", "_2"),
-                                     fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
   expect_lt(object = get_p_value(results_fast), expected = signif_threshold)
   expect_lt(object = get_p_value(results_ram), expected = signif_threshold)
@@ -97,13 +96,13 @@ test_that("dizygotic nonsignificant is as expected", {
   )
 
   results_ram <- discord_regression(dz_nonsignif,
-                                     outcome = "y1",
-                                     predictors = "y2",
-                                     id = "id",
-                                     sex = NULL,
-                                     race = NULL,
-                                     pair_identifiers = c("_1", "_2"),
-                                     fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
 
   expect_gt(object = get_p_value(results_fast), expected = signif_threshold)
@@ -124,13 +123,13 @@ test_that("half siblings significant is as expected", {
     fast = TRUE
   )
   results_ram <- discord_regression(half_sibs_signif,
-                                     outcome = "y1",
-                                     predictors = "y2",
-                                     id = "id",
-                                     sex = NULL,
-                                     race = NULL,
-                                     pair_identifiers = c("_1", "_2"),
-                                     fast = FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
 
   expect_lt(object = get_p_value(results_fast), expected = signif_threshold)
@@ -147,16 +146,16 @@ test_that("half siblings nonsignificant is as expected", {
     sex = NULL,
     race = NULL,
     pair_identifiers = c("_1", "_2"),
-    fast= TRUE
+    fast = TRUE
   )
   results_ram <- discord_regression(half_sibs_nonsignif,
-                                     outcome = "y1",
-                                     predictors = "y2",
-                                     id = "id",
-                                     sex = NULL,
-                                     race = NULL,
-                                     pair_identifiers = c("_1", "_2"),
-                                     fast= FALSE
+    outcome = "y1",
+    predictors = "y2",
+    id = "id",
+    sex = NULL,
+    race = NULL,
+    pair_identifiers = c("_1", "_2"),
+    fast = FALSE
   )
 
   expect_gt(object = get_p_value(results_fast), expected = signif_threshold)
diff --git a/tests/testthat/test-discord_regression_arguments.R b/tests/testthat/test-discord_regression_arguments.R
index 0b7d071..4094820 100644
--- a/tests/testthat/test-discord_regression_arguments.R
+++ b/tests/testthat/test-discord_regression_arguments.R
@@ -41,9 +41,9 @@ test_that("flu_2008 ~ edu_2008 + ses_2008 + race + sex", {
 
   set.seed(18)
   new_ram <- discord_regression(uniqueExtendedIDs,
-                            outcome = "flu_2008",
-                            predictors = c("edu_2008", "ses_2008"),
-                            fast = FALSE
+    outcome = "flu_2008",
+    predictors = c("edu_2008", "ses_2008"),
+    fast = FALSE
   )
 
   new_ram <- summarize_results(new_ram)
@@ -82,16 +82,16 @@ test_that("flu_2008 ~ edu_2008 + ses_2008 + race", {
     id = "extended_id",
     predictors = c("edu_2008", "ses_2008"),
     sex = NULL,
-    fast= TRUE
+    fast = TRUE
   )
 
   set.seed(18)
   new_ram <- discord_regression(uniqueExtendedIDs,
-                                 outcome = "flu_2008",
-                                 id = "extended_id",
-                                 predictors = c("edu_2008", "ses_2008"),
-                                 sex = NULL,
-                                 fast= FALSE
+    outcome = "flu_2008",
+    id = "extended_id",
+    predictors = c("edu_2008", "ses_2008"),
+    sex = NULL,
+    fast = FALSE
   )
   new_fast <- summarize_results(new_fast)
   new_ram <- summarize_results(new_ram)
@@ -134,12 +134,12 @@ test_that("flu_2008 ~ edu_2008 + ses_2008", {
   )
   set.seed(18)
   new_ram <- discord_regression(uniqueExtendedIDs,
-                                 outcome = "flu_2008",
-                                 id = "extended_id",
-                                 predictors = c("edu_2008", "ses_2008"),
-                                 sex = NULL,
-                                 race = NULL,
-                                 fast = FALSE
+    outcome = "flu_2008",
+    id = "extended_id",
+    predictors = c("edu_2008", "ses_2008"),
+    sex = NULL,
+    race = NULL,
+    fast = FALSE
   )
 
   new_fast <- summarize_results(new_fast)
@@ -183,15 +183,15 @@ test_that("flu_2008 ~ edu_2008 + ses_2008 + race + sex", {
   )
   set.seed(18)
   new_ram <- discord_regression(uniqueExtendedIDs,
-                                 outcome = "flu_2008",
-                                 predictors = "edu_2008",
-  fast = FALSE
+    outcome = "flu_2008",
+    predictors = "edu_2008",
+    fast = FALSE
   )
 
-new_fast <- summarize_results(new_fast)
-new_ram <- summarize_results(new_ram)
+  new_fast <- summarize_results(new_fast)
+  new_ram <- summarize_results(new_ram)
 
-expect_equal(control, new_fast, tolerance = 0.005)
-expect_equal(control, new_ram, tolerance = 0.005)
-expect_equal(new_fast, new_ram, tolerance = 0.005)
+  expect_equal(control, new_fast, tolerance = 0.005)
+  expect_equal(control, new_ram, tolerance = 0.005)
+  expect_equal(new_fast, new_ram, tolerance = 0.005)
 })
diff --git a/vignettes/Power.Rmd b/vignettes/Power.Rmd
index befc235..a764ac9 100644
--- a/vignettes/Power.Rmd
+++ b/vignettes/Power.Rmd
@@ -95,7 +95,6 @@ conditions <- expand.grid(
 For each condition, we simulate 100 replications of fitting discordant sibling models. We have kept the number of trials low for demonstration purposes, but you can increase `n_trials` for more robust power estimates. 
 
 ```{r}
-
 set.seed(1492) # Set seed for reproducibility
 n_trials <- 100
 FAST <- FALSE # Set to FALSE for slower, more detailed analysis
@@ -206,15 +205,27 @@ facet_labels <- list(
   cov_e = c("0" = "No Cov_E", "1" = "With Cov_E")
 )
 # Ensure factors with exact string levels
-final_results$cov_a <- factor(final_results$cov_a, levels = c(0, 0.25), 
-                              labels = c("No Covariate A",
-                                         "Covariate A (0.25)"))
-final_results$cov_c <- factor(final_results$cov_c, levels = c(0, 0.25), 
-                              labels = c("No Covariate C",
-                                         "Covariate C (0.25)"))
-final_results$cov_e <- factor(final_results$cov_e, levels = c(0, 0.25), 
-                              labels = c("No Covariate E",
-                                         "Covariate E (0.25)"))
+final_results$cov_a <- factor(final_results$cov_a,
+  levels = c(0, 0.25),
+  labels = c(
+    "No Covariate A",
+    "Covariate A (0.25)"
+  )
+)
+final_results$cov_c <- factor(final_results$cov_c,
+  levels = c(0, 0.25),
+  labels = c(
+    "No Covariate C",
+    "Covariate C (0.25)"
+  )
+)
+final_results$cov_e <- factor(final_results$cov_e,
+  levels = c(0, 0.25),
+  labels = c(
+    "No Covariate E",
+    "Covariate E (0.25)"
+  )
+)
 ```
 
 
@@ -266,7 +277,7 @@ p_noa <- ggplot(final_results_noa, aes(
     color = "Relatedness",
     fill = "Relatedness"
   ) +
-  theme_minimal() + 
+  theme_minimal() +
   geom_text(
     x = 600, y = 0.55, label = "False Positive Rate",
     data = data.frame(cov_c = "No Covariate C", cov_e = "No Covariate E"),

From ea56c1eec3864d2985ddbd91c2ee1c02f2f868c7 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 19 May 2025 11:15:19 -0400
Subject: [PATCH 3/4] add a test and rename internal function

---
 R/func_kinsim.R              | 12 ++++++------
 R/helpers_simulation.R       | 13 +++++++------
 man/{rmvn.Rd => dot-rmvn.Rd} |  6 +++---
 tests/testthat/test-kinsim.R |  6 ++++--
 4 files changed, 20 insertions(+), 17 deletions(-)
 rename man/{rmvn.Rd => dot-rmvn.Rd} (92%)

diff --git a/R/func_kinsim.R b/R/func_kinsim.R
index c1fa83f..82488ad 100644
--- a/R/func_kinsim.R
+++ b/R/func_kinsim.R
@@ -137,7 +137,7 @@ kinsim <- function(
       sigma_a[4, 1] <- cov_a * r_all[i]
       sigma_a[3, 2] <- cov_a * r_all[i]
       sigma_a[2, 3] <- cov_a * r_all[i]
-      A.r <- rmvn(n,
+      A.r <- .rmvn(n,
         sigma = sigma_a
       )
 
@@ -154,7 +154,7 @@ kinsim <- function(
       sigma_c[4, 1] <- cov_c * 1
       sigma_c[3, 2] <- cov_c * 1
       sigma_c[2, 3] <- cov_c * 1
-      C.r <- rmvn(n,
+      C.r <- .rmvn(n,
         sigma = sigma_c
       )
       C.r[, 1:2] <- C.r[, 1:2] * sC[1]
@@ -166,7 +166,7 @@ kinsim <- function(
       sigma_e[3, 1] <- cov_e
       sigma_e[2, 4] <- cov_e
       sigma_e[4, 2] <- cov_e
-      E.r <- rmvn(n,
+      E.r <- .rmvn(n,
         sigma = sigma_e
       )
       E.r[, 1:2] <- E.r[, 1:2] * sE[1]
@@ -225,7 +225,7 @@ kinsim <- function(
       sigma_a[3, 2] <- cov_a * r_val
       sigma_a[2, 3] <- cov_a * r_val
 
-      A_tmp <- rmvn(n_sub, sigma = sigma_a)
+      A_tmp <- .rmvn(n_sub, sigma = sigma_a)
 
       A.r[idx, 1:2] <- A_tmp[, 1:2] * sA[1]
       A.r[idx, 3:4] <- A_tmp[, 3:4] * sA[2]
@@ -241,7 +241,7 @@ kinsim <- function(
       sigma_c[3, 2] <- cov_c * 1
       sigma_c[2, 3] <- cov_c * 1
 
-      C_tmp <- rmvn(n_sub, sigma = sigma_c)
+      C_tmp <- .rmvn(n_sub, sigma = sigma_c)
       C.r[idx, 1:2] <- C_tmp[, 1:2] * sC[1]
       C.r[idx, 3:4] <- C_tmp[, 3:4] * sC[2]
 
@@ -252,7 +252,7 @@ kinsim <- function(
       sigma_e[2, 4] <- cov_e
       sigma_e[4, 2] <- cov_e
 
-      E_tmp <- rmvn(n_sub, sigma = sigma_e)
+      E_tmp <- .rmvn(n_sub, sigma = sigma_e)
       E.r[idx, 1:2] <- E_tmp[, 1:2] * sE[1]
       E.r[idx, 3:4] <- E_tmp[, 3:4] * sE[2]
     }
diff --git a/R/helpers_simulation.R b/R/helpers_simulation.R
index 8ed4be3..5b2fdac 100644
--- a/R/helpers_simulation.R
+++ b/R/helpers_simulation.R
@@ -8,7 +8,7 @@
 #' @return Matrix of dimension \code{n × ncol(sigma)} containing random samples
 #'   from the multivariate normal distribution.
 #' @keywords internal
-rmvn <- function(n, sigma) {
+.rmvn <- function(n, sigma) {
   Sh <- with(
     svd(sigma),
     v %*% diag(sqrt(d)) %*% t(u)
@@ -89,16 +89,17 @@ kinsim_internal <- function(
 
 
     # Generate data for each relatedness group
-    for (i in 1:length(r)) {
+#    for (i in 1:length(r)) {
+    for (i in seq_along(r)) {
       n <- npergroup[i]
 
       # Generate correlated genetic components based on relatedness
-      A.r <- sA * rmvn(n, sigma = diag(2) + S2 * r[i])
+      A.r <- sA * .rmvn(n, sigma = diag(2) + S2 * r[i])
 
       # Generate shared environmental components (same for both members)
       #     C.r <- stats::rnorm(n,sd = sC)
       #   C.r <- cbind(C.r,C.r )
-      C.r <- sC * rmvn(n, sigma = diag(2) + S2 * c_rel)
+      C.r <- sC * .rmvn(n, sigma = diag(2) + S2 * c_rel)
 
       # Generate non-shared environmental components (different for each member)
       E.r <- cbind(
@@ -140,7 +141,7 @@ kinsim_internal <- function(
     # Generate genetic components for each unique relatedness value
     for (i in 1:length(unique_r)) {
       n <- length(r_vector[r_vector == unique_r[i]])
-      A.rz <- sA * rmvn(n, sigma = diag(2) + S2 * unique_r[i])
+      A.rz <- sA * .rmvn(n, sigma = diag(2) + S2 * unique_r[i])
       data_vector$A.r1[data_vector$r_vector == unique_r[i]] <- A.rz[, 1]
       data_vector$A.r2[data_vector$r_vector == unique_r[i]] <- A.rz[, 2]
     }
@@ -149,7 +150,7 @@ kinsim_internal <- function(
       data_vector$A.r1,
       data_vector$A.r2
     ), ncol = 2)
-    C.r <- sC * rmvn(n, sigma = diag(2) + S2 * c_rel)
+    C.r <- sC * .rmvn(n, sigma = diag(2) + S2 * c_rel)
 
     E.r <- cbind(
       stats::rnorm(n, sd = sE),
diff --git a/man/rmvn.Rd b/man/dot-rmvn.Rd
similarity index 92%
rename from man/rmvn.Rd
rename to man/dot-rmvn.Rd
index 2674aae..a0bd240 100644
--- a/man/rmvn.Rd
+++ b/man/dot-rmvn.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/helpers_simulation.R
-\name{rmvn}
-\alias{rmvn}
+\name{.rmvn}
+\alias{.rmvn}
 \title{Generate Multivariate Normal Random Variates}
 \usage{
-rmvn(n, sigma)
+.rmvn(n, sigma)
 }
 \arguments{
 \item{n}{Integer. Number of samples to generate.}
diff --git a/tests/testthat/test-kinsim.R b/tests/testthat/test-kinsim.R
index eb5ae22..537ffb0 100644
--- a/tests/testthat/test-kinsim.R
+++ b/tests/testthat/test-kinsim.R
@@ -161,11 +161,13 @@ test_that("genetic correlation between variables is present when cov_a is non-ze
   expect_gt(cor_val, 0.1) # minimal expected correlation
 })
 
-test_that("kinsim handles r_vector input correctly", {
+test_that("kinsim handles r_vector and c_vecttor input correctly", {
   r_vec <- rep(c(1, 0.5), each = 100)
-  df <- kinsim(r_vector = r_vec)
+  c_vector <- rep(1, 200)
+  df <- kinsim(r_vector = r_vec, c_vector = c_vector)
   expect_equal(nrow(df), 200)
   expect_equal(df$r, r_vec)
+  expect_equal(df$C1_1, df$C1_2)
 })
 
 test_that("output has correct ID range", {

From 97e7ee7d748f7f80be4672898b4951d2c1547ff9 Mon Sep 17 00:00:00 2001
From: Mason Garrison <garrissm@wfu.edu>
Date: Mon, 19 May 2025 11:32:16 -0400
Subject: [PATCH 4/4] Update test-discord_regression.R

---
 NEWS.md                                  |   1 +
 R/func_discord_regression.R              |   6 +-
 tests/testthat/test-discord_regression.R | 182 +++++++++++++++++++++++
 3 files changed, 188 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index a52d691..13a8d18 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,6 +7,7 @@
 * Adding tests to ensure comparability between optimized and non-optimized versions of `discord_data()`.
 * Adding `discord_between_model()` to get the between-family model
 * Added unique filter for `discord_data()` to ensure that the data is not duplicated.
+* Added tests for categorical variables in `discord_data()`.
 
 # discord 1.2.3.1
 * More mild improvements to documentation
diff --git a/R/func_discord_regression.R b/R/func_discord_regression.R
index f4e94d6..03d4459 100644
--- a/R/func_discord_regression.R
+++ b/R/func_discord_regression.R
@@ -28,8 +28,12 @@ discord_regression <- function(data,
                                data_processed = FALSE,
                                coding_method = "none",
                                fast = TRUE) {
+ if( data_processed == TRUE & !is.data.frame(data) ) {
+    stop("data must be a data frame if data_processed is TRUE")
+ }
+  if( data_processed == FALSE){
   check_discord_errors(data = data, id = id, sex = sex, race = race, pair_identifiers = pair_identifiers)
-
+}
   # if no demographics provided
   if (is.null(demographics)) {
     if (is.null(sex) & is.null(race)) {
diff --git a/tests/testthat/test-discord_regression.R b/tests/testthat/test-discord_regression.R
index dffa0d2..066fc3c 100644
--- a/tests/testthat/test-discord_regression.R
+++ b/tests/testthat/test-discord_regression.R
@@ -162,3 +162,185 @@ test_that("half siblings nonsignificant is as expected", {
   expect_gt(object = get_p_value(results_ram), expected = signif_threshold)
   expect_equal(get_p_value(results_fast), get_p_value(results_ram), tolerance = 0.005)
 })
+
+
+default_setup <- function() {
+  set.seed(2023)
+  library(NlsyLinks)
+  library(dplyr)
+  data(data_flu_ses)
+  link_pairs <- Links79PairExpanded %>%
+    filter(RelationshipPath == "Gen1Housemates" & RFull == 0.5)
+  df_link <- CreatePairLinksSingleEntered(
+    outcomeDataset   = data_flu_ses,
+    linksPairDataset = link_pairs,
+    outcomeNames     = c("S00_H40", "RACE", "SEX")
+  ) %>%
+    filter(!is.na(S00_H40_S1) & !is.na(S00_H40_S2)) %>%
+    mutate(
+      SEX_S1  = ifelse(SEX_S1 == 0, "MALE", "FEMALE"),
+      SEX_S2  = ifelse(SEX_S2 == 0, "MALE", "FEMALE"),
+      RACE_S1 = ifelse(RACE_S1 == 0, "NONMINORITY", "MINORITY"),
+      RACE_S2 = ifelse(RACE_S2 == 0, "NONMINORITY", "MINORITY")
+    ) %>%
+    filter(RACE_S1 == RACE_S2) %>%
+    group_by(ExtendedID) %>%
+    slice_sample() %>%
+    ungroup()
+  return(df_link)
+}
+
+test_that("discord_data 'binary' coding excludes multi columns", {
+  df_link <- default_setup()
+  cd_bin <- discord_regression(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "both",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "binary"
+  )
+  expect_true("SEX_binarymatch" %in% names(cd_bin$model))
+  expect_false("SEX_multimatch" %in% names(cd_bin$model))
+  expect_true("RACE_binarymatch" %in% names(cd_bin$model))
+  expect_false("RACE_multimatch" %in% names(cd_bin$model))
+})
+
+
+
+
+test_that("discord_data with sex coding returns expected columns and values", {
+
+  set.seed(2023)
+  data(data_flu_ses)
+
+  df_link <- default_setup()
+
+  cat_sex <- discord_data(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "sex",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "both"
+  )
+  expect_true(all(cat_sex$SEX_multimatch %in% c("MALE", "FEMALE", "mixed")))
+  expect_true(all(cat_sex$SEX_binarymatch %in% c(0, 1)))
+
+  cat_sex_model <- discord_regression(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "sex",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "binary"
+  )
+
+  expect_true("SEX_binarymatch"  %in% names(cat_sex_model$model))
+  expect_false("SEX_multimatch"   %in% names(cat_sex_model$model))
+
+  expect_false("RACE_binarymatch" %in% names(cat_sex_model$model))
+  expect_false("RACE_multimatch" %in% names(cat_sex_model$model))
+
+})
+
+test_that("discord_data with race coding returns expected columns and values", {
+  set.seed(2023)
+  data(data_flu_ses)
+  df_link <- default_setup()
+  # reuse df_link from above
+  cat_race <- discord_data(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "race",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "both"
+  )
+  cat_race_model <- discord_regression(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "race",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "binary"
+  )
+
+
+  expect_true(all(cat_race$RACE_binarymatch %in% c(0, 1)))
+
+  # sample the distinct levels
+  expect_setequal(unique(cat_race$RACE_multimatch),
+                  c("NONMINORITY", "MINORITY"))
+
+  expect_false("SEX_multimatch" %in% names(cat_race_model$model))
+  expect_false("SEX_binarymatch" %in% names(cat_race_model$model))
+  expect_true("RACE_binarymatch" %in% names(cat_race_model$model))
+  expect_false("RACE_multimatch" %in% names(cat_race_model$model))
+})
+
+test_that("discord_data 'both' coding returns binary and multi columns", {
+  df_link <- default_setup()
+  cd <- discord_data(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "both",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "both"
+  )
+
+  cd_model <- discord_regression(
+    data             = cd,
+    data_processed = TRUE,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "both",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "multi"
+  )
+  expect_true(all(c( "SEX_multimatch",
+                    "RACE_multimatch") %in%
+                    names(cd_model$model)))
+
+  expect_false("SEX_binarymatch" %in% names(cd_model$model))
+  expect_false("RACE_binarymatch" %in% names(cd_model$model))
+})
+
+test_that("discord_regression returns a model with coefficients", {
+  set.seed(2023)
+  data(data_flu_ses)
+  df_link <- default_setup()
+
+  dr_mod <- discord_regression(
+    data             = df_link,
+    outcome          = "S00_H40",
+    sex              = "SEX",
+    race             = "RACE",
+    demographics     = "both",
+    predictors       = NULL,
+    pair_identifiers = c("_S1", "_S2"),
+    coding_method    = "multi"
+  )
+
+  # class check
+  expect_s3_class(dr_mod, c("lm", "discord_regression"))
+  # coefficient table is not empty
+  coefs <- broom::tidy(dr_mod)
+  expect_true(nrow(coefs) > 0)
+})
+