Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ jobs:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}

env:
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: immGLIPH
Title: Grouping of Lymphocyte Interactions by Paratope Hotspots
Version: 0.99.4
Version: 0.99.5
Authors@R: c(
person("Nick", "Borcherding", role = c("aut", "cre"),
email = "ncborch@gmail.com")
Expand Down
13 changes: 13 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# immGLIPH 0.99.5

* Vignette: removed undefined variable references (`mouse_tcr_data`,
`contig_list` placeholder, `...` in `custom_ref`). Five previously
unevaluated chunks now run during build, using `tempdir()`,
`scRepertoire::contig_list`, the bundled `gliph_sce`, and a real
reference data frame built from `gliph_input_data`. The three
remaining `eval = FALSE` chunks are limited to `BiocManager::install()`
calls and the optional `getGLIPHreference()` network download.
* Consolidated duplicated column-type coercion `for` loops in
`loadGLIPH()`, `plotNetwork()` and `clusterScoring()` into a single
`lapply()`-based helper, `.coerce_numeric_cols()`.

# immGLIPH 0.99.4

* Added a Validation section to the README with concordance metrics
Expand Down
8 changes: 1 addition & 7 deletions R/clusterScoring.R
Original file line number Diff line number Diff line change
Expand Up @@ -528,13 +528,7 @@ clusterScoring <- function(cluster_list,
}

# set all theoretical numeric values to numeric
if (is.data.frame(res)) {
for (i in seq_len(ncol(res))) {
if (!suppressWarnings(any(is.na(as.numeric(res[, i]))))) {
res[, i] <- as.numeric(res[, i])
}
}
}
res <- .coerce_numeric_cols(res)

return(res[, -1])
}
12 changes: 3 additions & 9 deletions R/loadGLIPH.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,7 @@ loadGLIPH <- function(result_folder = ""){
fname <- paste0(result_folder, "cluster_member_details.txt")
if(file.exists(fname)){
cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE)
for(i in seq_len(ncol(cluster_list))){
if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i])
}
cluster_list <- .coerce_numeric_cols(cluster_list)
tag_names <- unique(cluster_list$tag)
cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])})
names(cluster_list) <- tag_names
Expand Down Expand Up @@ -184,9 +182,7 @@ loadGLIPH <- function(result_folder = ""){
fname <- paste0(result_folder, "cluster_member_details.txt")
if(file.exists(fname)){
cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE)
for(i in seq_len(ncol(cluster_list))){
if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i])
}
cluster_list <- .coerce_numeric_cols(cluster_list)
tag_names <- unique(cluster_list$tag)
cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])})
names(cluster_list) <- tag_names
Expand Down Expand Up @@ -246,9 +242,7 @@ loadGLIPH <- function(result_folder = ""){
fname <- paste0(result_folder, "cluster_member_details.txt")
if(file.exists(fname)){
cluster_list <- utils::read.table(file = fname,sep = "\t",quote = "", header = TRUE, stringsAsFactors = FALSE)
for(i in seq_len(ncol(cluster_list))){
if(suppressWarnings(any(is.na(as.numeric(cluster_list[,i])))) == FALSE) cluster_list[,i] <- as.numeric(cluster_list[,i])
}
cluster_list <- .coerce_numeric_cols(cluster_list)
tag_names <- unique(cluster_list$tag)
cluster_list <- lapply(tag_names, function(x){return(cluster_list[cluster_list$tag == x,-1])})
names(cluster_list) <- tag_names
Expand Down
4 changes: 1 addition & 3 deletions R/plotNetwork.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,7 @@ plotNetwork <- function(clustering_output = NULL,
}, BPPARAM = BPPARAM))
cluster_data_frame <- data.frame(cluster_data_frame, stringsAsFactors = FALSE)
cluster_data_frame[] <- lapply(cluster_data_frame, as.character)
for(i in seq_len(ncol(cluster_data_frame))){
if(suppressWarnings(any(is.na(as.numeric(cluster_data_frame[,i])))) == FALSE) cluster_data_frame[,i] <- as.numeric(cluster_data_frame[,i])
}
cluster_data_frame <- .coerce_numeric_cols(cluster_data_frame)
cluster_data_frame$ID <- seq_len(nrow(cluster_data_frame))

# For better visualization insert a line break (<br>) between significant HLA alleles in the same cluster
Expand Down
10 changes: 4 additions & 6 deletions R/utils-output.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
#' @keywords internal
.coerce_numeric_cols <- function(df) {
if (!is.data.frame(df)) return(df)
for (i in seq_len(ncol(df))) {
vals <- suppressWarnings(as.numeric(df[, i]))
if (!any(is.na(vals))) {
df[, i] <- vals
}
}
df[] <- lapply(df, function(col) {
vals <- suppressWarnings(as.numeric(col))
if (anyNA(vals)) col else vals
})
df
}

Expand Down
83 changes: 49 additions & 34 deletions vignettes/immGLIPH.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -125,31 +125,46 @@ Alternative column names are automatically recognized (e.g., `cdr3`, `v_gene`,
When working with single-cell immune repertoire data, you can use
scRepertoire to prepare your data and pass the output directly to immGLIPH.

```{r eval=FALSE}
```{r}
library(scRepertoire)
data("contig_list")

# After processing with cellranger/etc, combine contigs
combined <- combineTCR(contig_list[1:2],
samples = c("P1", "P2"))
combined <- combineTCR(contig_list[seq_len(2)],
samples = c("P1", "P2"))

# Take a small slice so the example runs quickly. In real use, pass
# all samples and rely on the bundled reference downloaded by
# getGLIPHreference().
combined_small <- lapply(combined, function(x) x[seq_len(50), ])

# Use a small custom reference built from the bundled example data
small_ref <- gliph_input_data[, c("CDR3b", "TRBV")]

# Pass scRepertoire output directly to runGLIPH
results <- runGLIPH(combined,
method = "gliph2")
results_sc <- runGLIPH(combined_small,
method = "gliph2",
refdb_beta = small_ref,
sim_depth = 100,
n_cores = 1)
```

For **SingleCellExperiment** objects that already contain TCR metadata (e.g.,
added via `scRepertoire::combineExpression()`), immGLIPH extracts the receptor
data automatically using `immApex::getIR()`. Here is an example using the
bundled `gliph_sce` dataset:

```{r eval=FALSE}
```{r}
library(SingleCellExperiment)
data("gliph_sce")

# SingleCellExperiment object with TCR info in colData
results <- runGLIPH(gliph_sce,
method = "gliph2",
chains = "TRB")
results_sce <- runGLIPH(gliph_sce,
method = "gliph2",
chains = "TRB",
refdb_beta = small_ref,
sim_depth = 100,
n_cores = 1)
```

# The `runGLIPH()` Function
Expand Down Expand Up @@ -331,29 +346,24 @@ the original GLIPH publications. Each is available as CD4, CD8, or combined
Each reference includes pre-computed V-gene usage and CDR3 length frequency
distributions, which are automatically used for cluster scoring.

```{r eval=FALSE}
# Use the GLIPH2 mouse reference
res_mouse <- runGLIPH(
cdr3_sequences = mouse_tcr_data,
refdb_beta = "mouse_v1.0_CD48",
method = "gliph2",
sim_depth = 100,
n_cores = 1
)
```
To analyse mouse data, supply mouse CDR3$\beta$ sequences as
`cdr3_sequences` and set `refdb_beta = "mouse_v1.0_CD48"` (or one of the
CD4/CD8 subsets in the table). The reference is fetched and cached the
first time it is requested.

## Using a Custom Reference Database

You can also supply your own reference as a data frame:
You can also supply your own reference as a data frame. A minimal
reference is a two-column table of CDR3$\beta$ amino-acid sequences and
their corresponding V-gene names. Here we build a small one from the
bundled `gliph_input_data` for illustration:

```{r eval=FALSE}
custom_ref <- data.frame(
CDR3b = c("CASSLAPGATNEKLFF", "CASSLDRGEVFF", ...),
TRBV = c("TRBV5-1", "TRBV6-2", ...)
)
```{r}
custom_ref <- gliph_input_data[, c("CDR3b", "TRBV")]
head(custom_ref, 3)

res <- runGLIPH(
cdr3_sequences = gliph_input_data[seq_len(200), ],
cdr3_sequences = gliph_input_data[seq_len(100), ],
refdb_beta = custom_ref,
method = "gliph2",
sim_depth = 100,
Expand Down Expand Up @@ -524,19 +534,24 @@ if (!is.null(res_gliph1$cluster_properties) &&

# Loading Saved Results with `loadGLIPH()`

If you saved results to disk using `result_folder`, you can reload them:
If you save results to disk using `result_folder`, you can reload them
later. We use `tempdir()` here so the example does not write to your
working directory:

```{r eval=FALSE}
# Save results
res <- runGLIPH(
cdr3_sequences = gliph_input_data,
```{r}
out_dir <- file.path(tempdir(), "gliph_results")

res_saved <- runGLIPH(
cdr3_sequences = gliph_input_data[seq_len(200), ],
method = "gliph2",
result_folder = "my_results/",
refdb_beta = ref_df,
result_folder = out_dir,
sim_depth = 100,
n_cores = 1
)

# Later, reload
reloaded <- loadGLIPH(result_folder = "my_results/")
reloaded <- loadGLIPH(result_folder = out_dir)
names(reloaded)
```

# Saving Results to Disk
Expand Down
Loading