From c474a720f7bddf691efc4b30f1d8bd4e82bfa280 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 07:58:43 +0000
Subject: [PATCH 01/16] Initial plan


From ed796ce46bd9793ca3e10c911b3ca9e1fb04c0a4 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 09:13:38 +0000
Subject: [PATCH 02/16] Add summaries for Fligner, Levene, and Shapiro-Wilk
 diagnostics

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/7e0b4f05-0243-4783-b92c-ac0a48263e14

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../_sec_linreg_diagnostics.qmd               | 138 ++++++++++++++++++
 1 file changed, 138 insertions(+)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 3543712f0..87e5c91f8 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -911,6 +911,144 @@ All three plots show the same data and reference line.
 
 ---
 
+### Formal diagnostic tests for linear regression assumptions
+
+Graphical diagnostics are usually the first step,
+but formal tests can provide numerical summaries.
+
+For linear regression residuals,
+three common tests are:
+
+- `fligner.test()` for equal variances across groups
+  (the Fligner--Killeen test).
+- [Levene's test](https://en.wikipedia.org/wiki/Levene%27s_test)
+  (often implemented with a median-centered variant,
+  called the Brown--Forsythe test).
+- [Shapiro--Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test)
+  for normality.
+
+---
+
+#### Fligner--Killeen test (homoskedasticity across groups)
+
+Suppose residuals are split into groups
+($g = 1, \ldots, G$),
+for example by a categorical predictor.
+
+The test starts from absolute deviations from each group median:
+$$
+d_{gi} = |e_{gi} - \text{median}(e_{g1}, \ldots, e_{gn_g})|.
+$$
+
+After ranking the pooled $d_{gi}$ values,
+the Fligner--Killeen statistic is built from normal scores of those ranks.
+
+Under the null hypothesis of equal variances,
+the test statistic is approximately $\chi^2_{G-1}$.
+Small p-values suggest heteroskedasticity.
+
+---
+
+#### Levene / Brown--Forsythe test (homoskedasticity across groups)
+
+Levene's test transforms residuals to within-group absolute deviations:
+$$
+z_{gi} = |e_{gi} - c_g|,
+$$
+where $c_g$ is the group center.
+
+Classical Levene uses the group mean for $c_g$.
+Brown--Forsythe uses the group median,
+which is more robust.
+
+Then run a one-way ANOVA on $z_{gi}$ by group:
+$$
+F = \frac{\text{MS}_{\text{between}}}{\text{MS}_{\text{within}}}
+\sim F_{G-1, N-G}
+\quad\text{under }H_0.
+$$
+
+Small p-values suggest unequal residual variance.
+
+---
+
+#### Shapiro--Wilk test (normality of standardized residuals)
+
+For ordered standardized residuals
+$r_{(1)} \le \cdots \le r_{(n)}$,
+the Shapiro--Wilk statistic is:
+$$
+W =
+\frac{\left(\sum_{i=1}^n a_i r_{(i)}\right)^2}
+{\sum_{i=1}^n (r_i - \bar r)^2},
+$$
+where $a_i$ are constants from normal-order-statistic moments.
+
+If residuals are Gaussian,
+$W$ tends to be close to 1.
+Small $W$ (and small p-value)
+indicates departure from normality.
+
+---
+
+#### Numerical example (`birthweight` interaction model)
+
+```{r}
+diag_bw <-
+  bw |>
+  mutate(
+    resid_lm2 = resid(bw_lm2),
+    std_resid_lm2 = rstandard(bw_lm2)
+  ) |>
+  select(sex, resid_lm2, std_resid_lm2)
+
+fligner_bw <- fligner.test(resid_lm2 ~ sex, data = diag_bw)
+
+levene_bw <-
+  diag_bw |>
+  group_by(sex) |>
+  mutate(
+    med_resid = median(resid_lm2),
+    abs_dev = abs(resid_lm2 - med_resid)
+  ) |>
+  ungroup()
+
+levene_fit <- aov(abs_dev ~ sex, data = levene_bw)
+levene_tab <- summary(levene_fit)[[1]]
+levene_F <- unname(levene_tab[1, "F value"])
+levene_p <- unname(levene_tab[1, "Pr(>F)"])
+
+shapiro_bw <- shapiro.test(diag_bw$std_resid_lm2)
+
+tibble(
+  test = c(
+    "Fligner-Killeen: equal variance by sex",
+    "Levene/Brown-Forsythe: equal variance by sex",
+    "Shapiro-Wilk: normality of standardized residuals"
+  ),
+  statistic = c(
+    unname(fligner_bw$statistic),
+    levene_F,
+    unname(shapiro_bw$statistic)
+  ),
+  p_value = c(
+    fligner_bw$p.value,
+    levene_p,
+    shapiro_bw$p.value
+  )
+) |>
+  mutate(
+    statistic = signif(statistic, 4),
+    p_value = signif(p_value, 4)
+  )
+```
+
+Interpretation rule:
+for all three tests,
+a small p-value is evidence against the corresponding model assumption.
+
+---
+
 ### Conditional distributions of residuals
 
 If our Gaussian linear regression model is correct, the residuals $e_i$ and standardized residuals $r_i$ should have:

From 5711f152d57cd24617d4fca147f845e657f4a2b6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 09:29:26 +0000
Subject: [PATCH 03/16] Address review feedback for diagnostic test summaries

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/7e0b4f05-0243-4783-b92c-ac0a48263e14

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../Linear-models-overview/_sec_linreg_diagnostics.qmd | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 87e5c91f8..1eed94246 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -921,10 +921,13 @@ three common tests are:
 
 - `fligner.test()` for equal variances across groups
   (the Fligner--Killeen test).
+- `car::leveneTest()` (or equivalent ANOVA on absolute deviations)
+  for Levene / Brown--Forsythe testing.
 - [Levene's test](https://en.wikipedia.org/wiki/Levene%27s_test)
   (often implemented with a median-centered variant,
   called the Brown--Forsythe test).
-- [Shapiro--Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test)
+- `shapiro.test()` /
+  [Shapiro--Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test)
   for normality.
 
 ---
@@ -983,6 +986,8 @@ W =
 {\sum_{i=1}^n (r_i - \bar r)^2},
 $$
 where $a_i$ are constants from normal-order-statistic moments.
+The numerator uses ordered residuals $r_{(i)}$,
+while the denominator uses the original (unordered) residuals.
 
 If residuals are Gaussian,
 $W$ tends to be close to 1.
@@ -994,6 +999,7 @@ indicates departure from normality.
 #### Numerical example (`birthweight` interaction model)
 
 ```{r}
+#| code-fold: false
 diag_bw <-
   bw |>
   mutate(
@@ -1022,7 +1028,7 @@ shapiro_bw <- shapiro.test(diag_bw$std_resid_lm2)
 
 tibble(
   test = c(
-    "Fligner-Killeen: equal variance by sex",
+    "Fligner--Killeen: equal variance by sex",
     "Levene/Brown-Forsythe: equal variance by sex",
     "Shapiro-Wilk: normality of standardized residuals"
   ),

From 5b85b509e3b864d2eb70543fac6e2fe67fca989a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 17:04:50 +0000
Subject: [PATCH 04/16] Compare formal tests with visual diagnostics

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/3826fdc4-d9e9-45b5-a478-e6b62c33a630

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../Linear-models-overview/_sec_linreg_diagnostics.qmd | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 1eed94246..83a4b1d1d 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1053,6 +1053,16 @@ Interpretation rule:
 for all three tests,
 a small p-value is evidence against the corresponding model assumption.
 
+Compared with visual diagnostics:
+
+- Fligner--Killeen / Levene summarizes the same heteroskedasticity signal
+  that we inspect in residuals-vs-fitted and scale-location plots.
+- Shapiro--Wilk summarizes the same normality signal
+  that we inspect in QQ plots and residual histograms.
+- Use tests and plots together:
+  the tests provide a single numerical summary,
+  while the plots show the shape and practical size of departures.
+
 ---
 
 ### Conditional distributions of residuals

From 92bfe0c3d25256471161600c7de7528a74254c95 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 17:06:44 +0000
Subject: [PATCH 05/16] Clarify comparison with visual diagnostics

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/3826fdc4-d9e9-45b5-a478-e6b62c33a630

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 83a4b1d1d..849e8afc2 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1055,9 +1055,9 @@ a small p-value is evidence against the corresponding model assumption.
 
 Compared with visual diagnostics:
 
-- Fligner--Killeen / Levene summarizes the same heteroskedasticity signal
+- Fligner–Killeen / Levene summarizes the same heteroscedasticity signal
   that we inspect in residuals-vs-fitted and scale-location plots.
-- Shapiro--Wilk summarizes the same normality signal
+- Shapiro–Wilk summarizes the same normality signal
   that we inspect in QQ plots and residual histograms.
 - Use tests and plots together:
   the tests provide a single numerical summary,

From 69323b2d15f73c5ccd6eb5de6860e30cc161a1ed Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:26:45 +0000
Subject: [PATCH 06/16] Add Brown-Forsythe test details from Kutner

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/43dcdf33-0784-475e-8d65-15e1ce35768c

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../_sec_linreg_diagnostics.qmd               | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 849e8afc2..0bc198b3a 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -921,11 +921,9 @@ three common tests are:
 
 - `fligner.test()` for equal variances across groups
   (the Fligner--Killeen test).
-- `car::leveneTest()` (or equivalent ANOVA on absolute deviations)
-  for Levene / Brown--Forsythe testing.
-- [Levene's test](https://en.wikipedia.org/wiki/Levene%27s_test)
-  (often implemented with a median-centered variant,
-  called the Brown--Forsythe test).
+- Brown--Forsythe testing
+  (a robust median-centered Levene variant;
+  e.g., via `car::leveneTest(..., center = median)` or equivalent code).
 - `shapiro.test()` /
   [Shapiro--Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test)
   for normality.
@@ -973,6 +971,24 @@ $$
 
 Small p-values suggest unequal residual variance.
 
+For simple linear regression,
+@kutner2005applied [pp. 116--117] describes
+the Brown--Forsythe test
+by splitting observations into two $X$-level groups
+(low versus high),
+computing absolute deviations from each group median,
+and applying a two-sample pooled-variance t test:
+$$
+t_{\text{BF}} =
+\frac{\bar z_{1} - \bar z_{2}}
+{s_p \sqrt{1/n_1 + 1/n_2}},
+\quad
+t_{\text{BF}} \approx t_{n_1+n_2-2}
+\text{ under }H_0.
+$$
+Large $|t_{\text{BF}}|$
+indicates nonconstant residual variance.
+
 ---
 
 #### Shapiro--Wilk test (normality of standardized residuals)
@@ -1029,7 +1045,7 @@ shapiro_bw <- shapiro.test(diag_bw$std_resid_lm2)
 tibble(
   test = c(
     "Fligner--Killeen: equal variance by sex",
-    "Levene/Brown-Forsythe: equal variance by sex",
+    "Levene/Brown--Forsythe: equal variance by sex",
     "Shapiro-Wilk: normality of standardized residuals"
   ),
   statistic = c(

From 6a227bb17fb34daf0f3e3097e5dde295c0d9f7f1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:32:07 +0000
Subject: [PATCH 07/16] Standardize Shapiro-Wilk label punctuation

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/43dcdf33-0784-475e-8d65-15e1ce35768c

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 0bc198b3a..cb53ab5d6 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1046,7 +1046,7 @@ tibble(
   test = c(
     "Fligner--Killeen: equal variance by sex",
     "Levene/Brown--Forsythe: equal variance by sex",
-    "Shapiro-Wilk: normality of standardized residuals"
+    "Shapiro--Wilk: normality of standardized residuals"
   ),
   statistic = c(
     unname(fligner_bw$statistic),

From ef48cf35fce13d144a600e817b97403d6ef1b905 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:37:08 +0000
Subject: [PATCH 08/16] Normalize Brown-Forsythe formula subscripts

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/43dcdf33-0784-475e-8d65-15e1ce35768c

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index cb53ab5d6..f4e682d4c 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -981,9 +981,9 @@ and applying a two-sample pooled-variance t test:
 $$
 t_{\text{BF}} =
 \frac{\bar z_{1} - \bar z_{2}}
-{s_p \sqrt{1/n_1 + 1/n_2}},
+{s_p \sqrt{1/n_{1} + 1/n_{2}}},
 \quad
-t_{\text{BF}} \approx t_{n_1+n_2-2}
+t_{\text{BF}} \approx t_{n_{1}+n_{2}-2}
 \text{ under }H_0.
 $$
 Large $|t_{\text{BF}}|$

From 736fc7b21af6db54f7f3d08e1f21583e1abab9d2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:42:18 +0000
Subject: [PATCH 09/16] Define Brown-Forsythe formula terms explicitly

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/43dcdf33-0784-475e-8d65-15e1ce35768c

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../Linear-models-overview/_sec_linreg_diagnostics.qmd      | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index f4e682d4c..495cb8aad 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -978,6 +978,12 @@ by splitting observations into two $X$-level groups
 (low versus high),
 computing absolute deviations from each group median,
 and applying a two-sample pooled-variance t test:
+
+Here,
+$\bar z_{1}$ and $\bar z_{2}$
+are the group means of those absolute deviations,
+$s_p$ is their pooled standard deviation,
+and $n_{1}, n_{2}$ are the two group sample sizes.
 $$
 t_{\text{BF}} =
 \frac{\bar z_{1} - \bar z_{2}}

From 54d25f2758200518416946d01e53f799053efcd5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:47:36 +0000
Subject: [PATCH 10/16] Define z_ij notation for Brown-Forsythe test

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/43dcdf33-0784-475e-8d65-15e1ce35768c

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../_sec_linreg_diagnostics.qmd                 | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 495cb8aad..e0ed4164a 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -978,12 +978,12 @@ by splitting observations into two $X$-level groups
 (low versus high),
 computing absolute deviations from each group median,
 and applying a two-sample pooled-variance t test:
-
-Here,
-$\bar z_{1}$ and $\bar z_{2}$
-are the group means of those absolute deviations,
-$s_p$ is their pooled standard deviation,
-and $n_{1}, n_{2}$ are the two group sample sizes.
+let
+$$
+z_{ij} = |e_{ij} - \tilde e_i|,
+$$
+where $\tilde e_i$ is the median residual in group $i$.
+Then:
 $$
 t_{\text{BF}} =
 \frac{\bar z_{1} - \bar z_{2}}
@@ -992,6 +992,11 @@ t_{\text{BF}} =
 t_{\text{BF}} \approx t_{n_{1}+n_{2}-2}
 \text{ under }H_0.
 $$
+Here,
+$\bar z_{1}$ and $\bar z_{2}$
+are the group means of $z_{1j}$ and $z_{2j}$,
+$s_p$ is their pooled standard deviation,
+and $n_{1}, n_{2}$ are the two group sample sizes.
 Large $|t_{\text{BF}}|$
 indicates nonconstant residual variance.
 

From 54d35755d9f381bbdd08b9ee9cbdc93ae807bfba Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 18:52:47 +0000
Subject: [PATCH 11/16] Clarify Brown-Forsythe notation and robustness wording

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/43dcdf33-0784-475e-8d65-15e1ce35768c

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../_sec_linreg_diagnostics.qmd                       | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index e0ed4164a..ced8d5e08 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -922,7 +922,9 @@ three common tests are:
 - `fligner.test()` for equal variances across groups
   (the Fligner--Killeen test).
 - Brown--Forsythe testing
-  (a robust median-centered Levene variant;
+  (a median-centered Levene variant,
+  where standard Levene centers on group means,
+  and Brown--Forsythe centers on group medians for more robustness;
   e.g., via `car::leveneTest(..., center = median)` or equivalent code).
 - `shapiro.test()` /
   [Shapiro--Wilk test](https://en.wikipedia.org/wiki/Shapiro%E2%80%93Wilk_test)
@@ -982,7 +984,9 @@ let
 $$
 z_{ij} = |e_{ij} - \tilde e_i|,
 $$
-where $\tilde e_i$ is the median residual in group $i$.
+where
+$j$ indexes observations within group $i$,
+and $\tilde e_i$ is the median residual in group $i$.
 Then:
 $$
 t_{\text{BF}} =
@@ -994,7 +998,8 @@ t_{\text{BF}} \approx t_{n_{1}+n_{2}-2}
 $$
 Here,
 $\bar z_{1}$ and $\bar z_{2}$
-are the group means of $z_{1j}$ and $z_{2j}$,
+are the means of the $z_{ij}$ values
+in groups $i=1$ and $i=2$,
 $s_p$ is their pooled standard deviation,
 and $n_{1}, n_{2}$ are the two group sample sizes.
 Large $|t_{\text{BF}}|$

From 0a37a6dc8fccbe59c0564ed402aec1a8eec68df5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 16 Apr 2026 19:17:29 +0000
Subject: [PATCH 12/16] Cross-reference visual diagnostics figures in test
 summary

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/212ae3bb-917c-4d4d-9707-a0091a9d08f7

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../Linear-models-overview/_sec_linreg_diagnostics.qmd      | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index ced8d5e08..94db40347 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1088,9 +1088,11 @@ a small p-value is evidence against the corresponding model assumption.
 Compared with visual diagnostics:
 
 - Fligner–Killeen / Levene summarizes the same heteroscedasticity signal
-  that we inspect in residuals-vs-fitted and scale-location plots.
+  that we inspect in residuals-vs-fitted (@fig-bw_lm2-resid-vs-fitted)
+  and scale-location (@fig-bw-scale-loc) plots.
 - Shapiro–Wilk summarizes the same normality signal
-  that we inspect in QQ plots and residual histograms.
+  that we inspect in QQ plots (@fig-qqplot-autoplot)
+  and standardized-residual histograms (@fig-marg-stresd).
 - Use tests and plots together:
   the tests provide a single numerical summary,
   while the plots show the shape and practical size of departures.

From ac37cf7dce8b81fe9005f155e3838d71af33996f Mon Sep 17 00:00:00 2001
From: Douglas Ezra Morrison <demorrison@ucdavis.edu>
Date: Mon, 20 Apr 2026 00:30:57 -0700
Subject: [PATCH 13/16] Update
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 94db40347..dbc65e72c 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -940,7 +940,7 @@ for example by a categorical predictor.
 
 The test starts from absolute deviations from each group median:
 $$
-d_{gi} = |e_{gi} - \text{median}(e_{g1}, \ldots, e_{gn_g})|.
+d_{gi} = |e_{gi} - \text{median}(e_{g1}, \ldots, e_{g,n_g})|.
 $$
 
 After ranking the pooled $d_{gi}$ values,

From 0e5aabaf3b1e3d3f54023ea3be07a19eead156e3 Mon Sep 17 00:00:00 2001
From: Douglas Ezra Morrison <demorrison@ucdavis.edu>
Date: Mon, 27 Apr 2026 19:54:04 -0700
Subject: [PATCH 14/16] Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index dbc65e72c..639338b64 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1087,7 +1087,7 @@ a small p-value is evidence against the corresponding model assumption.
 
 Compared with visual diagnostics:
 
-- Fligner–Killeen / Levene summarizes the same heteroscedasticity signal
+- Fligner–Killeen / Levene summarizes the same heteroskedasticity signal
   that we inspect in residuals-vs-fitted (@fig-bw_lm2-resid-vs-fitted)
   and scale-location (@fig-bw-scale-loc) plots.
 - Shapiro–Wilk summarizes the same normality signal

From 8e6eb716f8391d3ffdf2374e5b4f26f04528f465 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 28 Apr 2026 03:03:29 +0000
Subject: [PATCH 15/16] Apply diagnostics review thread fixes

Agent-Logs-Url: https://github.com/d-morrison/rme/sessions/a245d4d3-0b77-4778-bc66-853c5f496480

Co-authored-by: d-morrison <2474437+d-morrison@users.noreply.github.com>
---
 .../Linear-models-overview/_sec_linreg_diagnostics.qmd      | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index 5e17321be..fc0c14e46 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1106,7 +1106,7 @@ three common tests are:
 
 - `fligner.test()` for equal variances across groups
   (the Fligner--Killeen test).
-- Brown--Forsythe testing
+- Levene / Brown--Forsythe test
   (a median-centered Levene variant,
   where standard Levene centers on group means,
   and Brown--Forsythe centers on group medians for more robustness;
@@ -1219,10 +1219,6 @@ indicates departure from normality.
 #| code-fold: false
 diag_bw <-
   bw |>
-  mutate(
-    resid_lm2 = resid(bw_lm2),
-    std_resid_lm2 = rstandard(bw_lm2)
-  ) |>
   select(sex, resid_lm2, std_resid_lm2)
 
 fligner_bw <- fligner.test(resid_lm2 ~ sex, data = diag_bw)

From 14b240dd7942403695d33c74e8f93bbc49aee4b2 Mon Sep 17 00:00:00 2001
From: Douglas Ezra Morrison <demorrison@ucdavis.edu>
Date: Mon, 27 Apr 2026 20:58:43 -0700
Subject: [PATCH 16/16] Update
 _subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../_sec_linreg_diagnostics.qmd                      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
index fc0c14e46..c648b0e05 100644
--- a/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
+++ b/_subfiles/Linear-models-overview/_sec_linreg_diagnostics.qmd
@@ -1214,12 +1214,12 @@ indicates departure from normality.
 ---
 
 #### Numerical example (`birthweight` interaction model)
-
-```{r}
-#| code-fold: false
-diag_bw <-
-  bw |>
-  select(sex, resid_lm2, std_resid_lm2)
+  broom::augment(bw_lm2, data = bw) |>
+  transmute(
+    sex,
+    resid_lm2 = .resid,
+    std_resid_lm2 = .std.resid
+  )
 
 fligner_bw <- fligner.test(resid_lm2 ~ sex, data = diag_bw)