BorchLab · ncborcherding · May 20, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -21,7 +21,6 @@ jobs:
         config:
           - {os: macos-latest,   r: 'release'}
           - {os: windows-latest, r: 'release'}
-          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-latest,   r: 'release'}
 
     env:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: immGLIPH
 Title: Grouping of Lymphocyte Interactions by Paratope Hotspots
-Version: 0.99.4
+Version: 0.99.5
 Authors@R: c(
     person("Nick", "Borcherding", role = c("aut", "cre"),
            email = "ncborch@gmail.com")

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+# immGLIPH 0.99.5
+
+* Vignette: removed undefined variable references (`mouse_tcr_data`,
+  `contig_list` placeholder, `...` in `custom_ref`). Five previously
+  unevaluated chunks now run during build, using `tempdir()`,
+  `scRepertoire::contig_list`, the bundled `gliph_sce`, and a real
+  reference data frame built from `gliph_input_data`. The three
+  remaining `eval = FALSE` chunks are limited to `BiocManager::install()`
+  calls and the optional `getGLIPHreference()` network download.
+* Consolidated duplicated column-type coercion `for` loops in
+  `loadGLIPH()`, `plotNetwork()` and `clusterScoring()` into a single
+  `lapply()`-based helper, `.coerce_numeric_cols()`.
+
 # immGLIPH 0.99.4
 
 * Added a Validation section to the README with concordance metrics

diff --git a/R/clusterScoring.R b/R/clusterScoring.R
@@ -528,13 +528,7 @@ clusterScoring <- function(cluster_list,
   }
 
   # set all theoretical numeric values to numeric
-  if (is.data.frame(res)) {
-    for (i in seq_len(ncol(res))) {
-      if (!suppressWarnings(any(is.na(as.numeric(res[, i]))))) {
-        res[, i] <- as.numeric(res[, i])
-      }
-    }
-  }
+  res <- .coerce_numeric_cols(res)
 
   return(res[, -1])
 }
diff --git a/R/loadGLIPH.R b/R/loadGLIPH.R
@@ -120,9 +120,7 @@ loadGLIPH <- function(result_folder = ""){
     fname <- paste0(result_folder, "cluster_member_details.txt")
     if(file.exists(fname)){
       cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE)
-      for(i in seq_len(ncol(cluster_list))){
-        if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i])
-      }
+      cluster_list <- .coerce_numeric_cols(cluster_list)
       tag_names <- unique(cluster_list$tag)
       cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])})
       names(cluster_list) <- tag_names
@@ -184,9 +182,7 @@ loadGLIPH <- function(result_folder = ""){
       fname <- paste0(result_folder, "cluster_member_details.txt")
       if(file.exists(fname)){
         cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE)
-        for(i in seq_len(ncol(cluster_list))){
-          if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i])
-        }
+        cluster_list <- .coerce_numeric_cols(cluster_list)
         tag_names <- unique(cluster_list$tag)
         cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])})
         names(cluster_list) <- tag_names
@@ -246,9 +242,7 @@ loadGLIPH <- function(result_folder = ""){
       fname <- paste0(result_folder, "cluster_member_details.txt")
       if(file.exists(fname)){
         cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE)
-        for(i in seq_len(ncol(cluster_list))){
-          if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i])
-        }
+        cluster_list <- .coerce_numeric_cols(cluster_list)
         tag_names <- unique(cluster_list$tag)
         cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])})
         names(cluster_list) <- tag_names

diff --git a/R/plotNetwork.R b/R/plotNetwork.R
@@ -159,9 +159,7 @@ plotNetwork <- function(clustering_output = NULL,
   }, BPPARAM = BPPARAM))
   cluster_data_frame <- data.frame(cluster_data_frame, stringsAsFactors = FALSE)
   cluster_data_frame[] <- lapply(cluster_data_frame, as.character)
-  for(i in seq_len(ncol(cluster_data_frame))){
-    if(suppressWarnings(any(is.na(as.numeric(cluster_data_frame[,i])))) == FALSE) cluster_data_frame[,i] <- as.numeric(cluster_data_frame[,i])
-  }
+  cluster_data_frame <- .coerce_numeric_cols(cluster_data_frame)
   cluster_data_frame$ID <- seq_len(nrow(cluster_data_frame))
 
   # For better visualization insert a line break (<br>) between significant HLA alleles in the same cluster

diff --git a/R/utils-output.R b/R/utils-output.R
@@ -5,12 +5,10 @@
 #' @keywords internal
 .coerce_numeric_cols <- function(df) {
     if (!is.data.frame(df)) return(df)
-    for (i in seq_len(ncol(df))) {
-        vals <- suppressWarnings(as.numeric(df[, i]))
-        if (!any(is.na(vals))) {
-            df[, i] <- vals
-        }
-    }
+    df[] <- lapply(df, function(col) {
+        vals <- suppressWarnings(as.numeric(col))
+        if (anyNA(vals)) col else vals
+    })
     df
 }
 

diff --git a/vignettes/immGLIPH.Rmd b/vignettes/immGLIPH.Rmd
@@ -125,31 +125,46 @@ Alternative column names are automatically recognized (e.g., `cdr3`, `v_gene`,
 When working with single-cell immune repertoire data, you can use
 scRepertoire to prepare your data and pass the output directly to immGLIPH.
 
-```{r eval=FALSE}
+```{r}
 library(scRepertoire)
+data("contig_list")
 
 # After processing with cellranger/etc, combine contigs
-combined <- combineTCR(contig_list[1:2],
-                        samples = c("P1", "P2"))
+combined <- combineTCR(contig_list[seq_len(2)],
+                       samples = c("P1", "P2"))
+
+# Take a small slice so the example runs quickly. In real use, pass
+# all samples and rely on the bundled reference downloaded by
+# getGLIPHreference().
+combined_small <- lapply(combined, function(x) x[seq_len(50), ])
+
+# Use a small custom reference built from the bundled example data
+small_ref <- gliph_input_data[, c("CDR3b", "TRBV")]
 
 # Pass scRepertoire output directly to runGLIPH
-results <- runGLIPH(combined, 
-                    method = "gliph2")
+results_sc <- runGLIPH(combined_small,
+                       method     = "gliph2",
+                       refdb_beta = small_ref,
+                       sim_depth  = 100,
+                       n_cores    = 1)
 ```
 
 For **SingleCellExperiment** objects that already contain TCR metadata (e.g.,
 added via `scRepertoire::combineExpression()`), immGLIPH extracts the receptor
 data automatically using `immApex::getIR()`. Here is an example using the
 bundled `gliph_sce` dataset:
 
-```{r eval=FALSE}
+```{r}
 library(SingleCellExperiment)
 data("gliph_sce")
 
 # SingleCellExperiment object with TCR info in colData
-results <- runGLIPH(gliph_sce, 
-                    method = "gliph2", 
-                    chains = "TRB")
+results_sce <- runGLIPH(gliph_sce,
+                        method     = "gliph2",
+                        chains     = "TRB",
+                        refdb_beta = small_ref,
+                        sim_depth  = 100,
+                        n_cores    = 1)
 ```
 
 # The `runGLIPH()` Function
@@ -331,29 +346,24 @@ the original GLIPH publications. Each is available as CD4, CD8, or combined
 Each reference includes pre-computed V-gene usage and CDR3 length frequency
 distributions, which are automatically used for cluster scoring.
 
-```{r eval=FALSE}
-# Use the GLIPH2 mouse reference
-res_mouse <- runGLIPH(
-  cdr3_sequences = mouse_tcr_data,
-  refdb_beta     = "mouse_v1.0_CD48",
-  method         = "gliph2",
-  sim_depth      = 100,
-  n_cores        = 1
-)
-```
+To analyse mouse data, supply mouse CDR3$\beta$ sequences as
+`cdr3_sequences` and set `refdb_beta = "mouse_v1.0_CD48"` (or one of the
+CD4/CD8 subsets in the table). The reference is fetched and cached the
+first time it is requested.
 
 ## Using a Custom Reference Database
 
-You can also supply your own reference as a data frame:
+You can also supply your own reference as a data frame. A minimal
+reference is a two-column table of CDR3$\beta$ amino-acid sequences and
+their corresponding V-gene names. Here we build a small one from the
+bundled `gliph_input_data` for illustration:
 
-```{r eval=FALSE}
-custom_ref <- data.frame(
-  CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", ...),
-  TRBV  = c("TRBV5-1", "TRBV6-2", ...)
-)
+```{r}
+custom_ref <- gliph_input_data[, c("CDR3b", "TRBV")]
+head(custom_ref, 3)
 
 res <- runGLIPH(
-  cdr3_sequences = gliph_input_data[seq_len(200), ],
+  cdr3_sequences = gliph_input_data[seq_len(100), ],
   refdb_beta     = custom_ref,
   method         = "gliph2",
   sim_depth      = 100,
@@ -524,19 +534,24 @@ if (!is.null(res_gliph1$cluster_properties) &&
 
 # Loading Saved Results with `loadGLIPH()`
 
-If you saved results to disk using `result_folder`, you can reload them:
+If you save results to disk using `result_folder`, you can reload them
+later. We use `tempdir()` here so the example does not write to your
+working directory:
 
-```{r eval=FALSE}
-# Save results
-res <- runGLIPH(
-  cdr3_sequences = gliph_input_data,
+```{r}
+out_dir <- file.path(tempdir(), "gliph_results")
+
+res_saved <- runGLIPH(
+  cdr3_sequences = gliph_input_data[seq_len(200), ],
   method         = "gliph2",
-  result_folder  = "my_results/",
+  refdb_beta     = ref_df,
+  result_folder  = out_dir,
+  sim_depth      = 100,
   n_cores        = 1
 )
 
-# Later, reload
-reloaded <- loadGLIPH(result_folder = "my_results/")
+reloaded <- loadGLIPH(result_folder = out_dir)
+names(reloaded)
 ```
 
 # Saving Results to Disk