diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 0d8bcc6..9647989 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -21,7 +21,6 @@ jobs: config: - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} env: diff --git a/DESCRIPTION b/DESCRIPTION index 8dc424d..d5b69c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: immGLIPH Title: Grouping of Lymphocyte Interactions by Paratope Hotspots -Version: 0.99.4 +Version: 0.99.5 Authors@R: c( person("Nick", "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com") diff --git a/NEWS.md b/NEWS.md index 0200bc0..de23d73 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,16 @@ +# immGLIPH 0.99.5 + +* Vignette: removed undefined variable references (`mouse_tcr_data`, + `contig_list` placeholder, `...` in `custom_ref`). Five previously + unevaluated chunks now run during build, using `tempdir()`, + `scRepertoire::contig_list`, the bundled `gliph_sce`, and a real + reference data frame built from `gliph_input_data`. The three + remaining `eval = FALSE` chunks are limited to `BiocManager::install()` + calls and the optional `getGLIPHreference()` network download. +* Consolidated duplicated column-type coercion `for` loops in + `loadGLIPH()`, `plotNetwork()` and `clusterScoring()` into a single + `lapply()`-based helper, `.coerce_numeric_cols()`. + # immGLIPH 0.99.4 * Added a Validation section to the README with concordance metrics diff --git a/R/clusterScoring.R b/R/clusterScoring.R index 092552c..5bdd5d3 100644 --- a/R/clusterScoring.R +++ b/R/clusterScoring.R @@ -528,13 +528,7 @@ clusterScoring <- function(cluster_list, } # set all theoretical numeric values to numeric - if (is.data.frame(res)) { - for (i in seq_len(ncol(res))) { - if (!suppressWarnings(any(is.na(as.numeric(res[, i]))))) { - res[, i] <- as.numeric(res[, i]) - } - } - } + res <- .coerce_numeric_cols(res) return(res[, -1]) } diff --git a/R/loadGLIPH.R b/R/loadGLIPH.R index 177ddf5..8e5eb8a 100644 --- a/R/loadGLIPH.R +++ b/R/loadGLIPH.R @@ -120,9 +120,7 @@ loadGLIPH <- function(result_folder = ""){ fname <- paste0(result_folder, "cluster_member_details.txt") if(file.exists(fname)){ cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE) - for(i in seq_len(ncol(cluster_list))){ - if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i]) - } + cluster_list <- .coerce_numeric_cols(cluster_list) tag_names <- unique(cluster_list$tag) cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])}) names(cluster_list) <- tag_names @@ -184,9 +182,7 @@ loadGLIPH <- function(result_folder = ""){ fname <- paste0(result_folder, "cluster_member_details.txt") if(file.exists(fname)){ cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE) - for(i in seq_len(ncol(cluster_list))){ - if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i]) - } + cluster_list <- .coerce_numeric_cols(cluster_list) tag_names <- unique(cluster_list$tag) cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])}) names(cluster_list) <- tag_names @@ -246,9 +242,7 @@ loadGLIPH <- function(result_folder = ""){ fname <- paste0(result_folder, "cluster_member_details.txt") if(file.exists(fname)){ cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE) - for(i in seq_len(ncol(cluster_list))){ - if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i]) - } + cluster_list <- .coerce_numeric_cols(cluster_list) tag_names <- unique(cluster_list$tag) cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])}) names(cluster_list) <- tag_names diff --git a/R/plotNetwork.R b/R/plotNetwork.R index 6e88fc9..1d85cdb 100644 --- a/R/plotNetwork.R +++ b/R/plotNetwork.R @@ -159,9 +159,7 @@ plotNetwork <- function(clustering_output = NULL, }, BPPARAM = BPPARAM)) cluster_data_frame <- data.frame(cluster_data_frame, stringsAsFactors = FALSE) cluster_data_frame[] <- lapply(cluster_data_frame, as.character) - for(i in seq_len(ncol(cluster_data_frame))){ - if(suppressWarnings(any(is.na(as.numeric(cluster_data_frame[,i])))) == FALSE) cluster_data_frame[,i] <- as.numeric(cluster_data_frame[,i]) - } + cluster_data_frame <- .coerce_numeric_cols(cluster_data_frame) cluster_data_frame$ID <- seq_len(nrow(cluster_data_frame)) # For better visualization insert a line break (
) between significant HLA alleles in the same cluster diff --git a/R/utils-output.R b/R/utils-output.R index be5fdad..5f97e11 100644 --- a/R/utils-output.R +++ b/R/utils-output.R @@ -5,12 +5,10 @@ #' @keywords internal .coerce_numeric_cols <- function(df) { if (!is.data.frame(df)) return(df) - for (i in seq_len(ncol(df))) { - vals <- suppressWarnings(as.numeric(df[, i])) - if (!any(is.na(vals))) { - df[, i] <- vals - } - } + df[] <- lapply(df, function(col) { + vals <- suppressWarnings(as.numeric(col)) + if (anyNA(vals)) col else vals + }) df } diff --git a/vignettes/immGLIPH.Rmd b/vignettes/immGLIPH.Rmd index fc5d89a..a78e02c 100644 --- a/vignettes/immGLIPH.Rmd +++ b/vignettes/immGLIPH.Rmd @@ -125,16 +125,28 @@ Alternative column names are automatically recognized (e.g., `cdr3`, `v_gene`, When working with single-cell immune repertoire data, you can use scRepertoire to prepare your data and pass the output directly to immGLIPH. -```{r eval=FALSE} +```{r} library(scRepertoire) +data("contig_list") # After processing with cellranger/etc, combine contigs -combined <- combineTCR(contig_list[1:2], - samples = c("P1", "P2")) +combined <- combineTCR(contig_list[seq_len(2)], + samples = c("P1", "P2")) + +# Take a small slice so the example runs quickly. In real use, pass +# all samples and rely on the bundled reference downloaded by +# getGLIPHreference(). +combined_small <- lapply(combined, function(x) x[seq_len(50), ]) + +# Use a small custom reference built from the bundled example data +small_ref <- gliph_input_data[, c("CDR3b", "TRBV")] # Pass scRepertoire output directly to runGLIPH -results <- runGLIPH(combined, - method = "gliph2") +results_sc <- runGLIPH(combined_small, + method = "gliph2", + refdb_beta = small_ref, + sim_depth = 100, + n_cores = 1) ``` For **SingleCellExperiment** objects that already contain TCR metadata (e.g., @@ -142,14 +154,17 @@ added via `scRepertoire::combineExpression()`), immGLIPH extracts the receptor data automatically using `immApex::getIR()`. Here is an example using the bundled `gliph_sce` dataset: -```{r eval=FALSE} +```{r} library(SingleCellExperiment) data("gliph_sce") # SingleCellExperiment object with TCR info in colData -results <- runGLIPH(gliph_sce, - method = "gliph2", - chains = "TRB") +results_sce <- runGLIPH(gliph_sce, + method = "gliph2", + chains = "TRB", + refdb_beta = small_ref, + sim_depth = 100, + n_cores = 1) ``` # The `runGLIPH()` Function @@ -331,29 +346,24 @@ the original GLIPH publications. Each is available as CD4, CD8, or combined Each reference includes pre-computed V-gene usage and CDR3 length frequency distributions, which are automatically used for cluster scoring. -```{r eval=FALSE} -# Use the GLIPH2 mouse reference -res_mouse <- runGLIPH( - cdr3_sequences = mouse_tcr_data, - refdb_beta = "mouse_v1.0_CD48", - method = "gliph2", - sim_depth = 100, - n_cores = 1 -) -``` +To analyse mouse data, supply mouse CDR3$\beta$ sequences as +`cdr3_sequences` and set `refdb_beta = "mouse_v1.0_CD48"` (or one of the +CD4/CD8 subsets in the table). The reference is fetched and cached the +first time it is requested. ## Using a Custom Reference Database -You can also supply your own reference as a data frame: +You can also supply your own reference as a data frame. A minimal +reference is a two-column table of CDR3$\beta$ amino-acid sequences and +their corresponding V-gene names. Here we build a small one from the +bundled `gliph_input_data` for illustration: -```{r eval=FALSE} -custom_ref <- data.frame( - CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", ...), - TRBV = c("TRBV5-1", "TRBV6-2", ...) -) +```{r} +custom_ref <- gliph_input_data[, c("CDR3b", "TRBV")] +head(custom_ref, 3) res <- runGLIPH( - cdr3_sequences = gliph_input_data[seq_len(200), ], + cdr3_sequences = gliph_input_data[seq_len(100), ], refdb_beta = custom_ref, method = "gliph2", sim_depth = 100, @@ -524,19 +534,24 @@ if (!is.null(res_gliph1$cluster_properties) && # Loading Saved Results with `loadGLIPH()` -If you saved results to disk using `result_folder`, you can reload them: +If you save results to disk using `result_folder`, you can reload them +later. We use `tempdir()` here so the example does not write to your +working directory: -```{r eval=FALSE} -# Save results -res <- runGLIPH( - cdr3_sequences = gliph_input_data, +```{r} +out_dir <- file.path(tempdir(), "gliph_results") + +res_saved <- runGLIPH( + cdr3_sequences = gliph_input_data[seq_len(200), ], method = "gliph2", - result_folder = "my_results/", + refdb_beta = ref_df, + result_folder = out_dir, + sim_depth = 100, n_cores = 1 ) -# Later, reload -reloaded <- loadGLIPH(result_folder = "my_results/") +reloaded <- loadGLIPH(result_folder = out_dir) +names(reloaded) ``` # Saving Results to Disk