From 07255586660f198f0f38d5d3a07d04bedd24638a Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Wed, 1 Apr 2026 16:01:04 -0300 Subject: [PATCH 01/18] add Linux compatibility for MaxEnt model fitting - Replace Windows batch file approach with direct system2("java") invocation in new runMaxent() helper; works cross-platform, no .bat file needed. - Update checkJava() to resolve Java via JAVA_HOME and common install directories when SyncroSim's minimal PATH does not include Java, patching the R session PATH so all subsequent calls succeed. - Remove fsep = "\\" from file.path() calls used by R file operations. - Clean up section organization in 08-fit-model-functions.R. --- src/08-fit-model-functions.R | 513 ++++++++++++++--------------------- src/8-fit-maxent.R | 22 +- 2 files changed, 209 insertions(+), 326 deletions(-) diff --git a/src/08-fit-model-functions.R b/src/08-fit-model-functions.R index d929547..2603a62 100644 --- a/src/08-fit-model-functions.R +++ b/src/08-fit-model-functions.R @@ -9,7 +9,7 @@ library(ROCR) library(ggplot2) library(splines) -# MODEL FIT FUNCTION ----------------------------------------------------------- +# Model Fit Functions ---------------------------------------------------------- ## Fit model ------------------------------------------------------------------- @@ -52,7 +52,7 @@ fitModel <- function( #================================================================ # GLM - #================================================================= + #================================================================ if (out$modType == "glm") { if (out$pseudoAbs) { @@ -190,7 +190,7 @@ fitModel <- function( #================================================================ # RF - #================================================================= + #================================================================ if (out$modType == "rf") { # set defaults n.trees = out$modOptions$NumberOfTrees @@ -286,294 +286,34 @@ fitModel <- function( #================================================================ # MAXENT - #================================================================= + #================================================================ if (out$modType == "maxent") { - # If there are parentheses in the working folder name, then the jar file will not run properly - validTempPath <- sum(grepl("\\(", strsplit(out$tempDir, "")[[1]])) + - sum(grepl("\\)", strsplit(out$tempDir, "")[[1]])) == - 0 - - if (!validTempPath) { - stop(paste0( - "Maxent model will not run if there are parentheses in the", - " temporary directory path. Please set a different ", - "temporary folder before continuing." - )) - } - if (fullFit) { - # Prepare batch file ---- - capture.output( - cat("java -mx", out$modOptions$MemoryLimit, "m", sep = ""), - file = out$batchPath - ) - - # core executable - cat( - " -jar", - paste0( - '"', - file.path( - ssimEnvironment()$PackageDirectory, - "maxent.jar", - fsep = "\\" - ), - '"' - ), - file = out$batchPath, - append = T - ) - # optional visible interface - if (!out$modOptions$VisibleInterface) { - cat(" -z", file = out$batchPath, append = T) - } - - # input files - cat( - paste0(' samplesfile="', out$swdPath, '"'), - file = out$batchPath, - append = T - ) - cat( - paste0(' environmentallayers="', out$backgroundPath, '"'), - file = out$batchPath, - append = T - ) - # test data (if provided) - if (!is.null(out$testDataPath)) { - cat( - paste0(' testsamplesfile="', out$testDataPath, '"'), - file = out$batchPath, - append = T - ) - } - # factor (categorical) layers - if (length(out$factorInputVars) > 0) { - for (i in out$factorInputVars) { - cat(paste0(" togglelayertype=", i), file = out$batchPath, append = T) - } - } - - # output directory - cat( - paste0( - ' outputdirectory="', - file.path(out$tempDir, "Outputs", fsep = "\\"), - '"' - ), - file = out$batchPath, - append = T - ) - # performance settings - cat( - " threads=", - out$modOptions$MultiprocessingThreads, - sep = "", - file = out$batchPath, - append = T - ) - # model complexity settings - cat( - " autofeature=", - tolower(out$modOptions$AutoFeatureSelection), - sep = "", - file = out$batchPath, - append = TRUE - ) - cat( - " betamultiplier=", - out$modOptions$RegularizationMultiplier, - sep = "", - file = out$batchPath, - append = TRUE + runMaxent( + out = out, + samplesfile = out$swdPath, + envlayers = out$backgroundPath, + outputdir = file.path(out$tempDir, "Outputs"), + testsamplesfile = out$testDataPath, + fullFit = TRUE ) - cat( - " doclamp=", - tolower(out$modOptions$EnableClamping), - sep = "", - file = out$batchPath, - append = TRUE - ) - - # Explicit feature toggles (if auto feature selection is off) - if (!out$modOptions$AutoFeatureSelection) { - cat( - paste0( - " linear=", - tolower(out$modOptions$UseLinear), - " quadratic=", - tolower(out$modOptions$UseQuadratic), - " product=", - tolower(out$modOptions$UseProduct), - " hinge=", - tolower(out$modOptions$UseHinge), - " threshold=", - tolower(out$modOptions$UseThreshold) - ), - file = out$batchPath, - append = TRUE - ) - } - - # output and diagnostics - cat( - " responsecurves jackknife writeclampgrid writemess warnings prefixes", - file = out$batchPath, - append = T - ) - - # execution controls - cat(" redoifexists autorun", file = out$batchPath, append = T) - - # Note than maxent can't handle spaces in the batch file path - # - if there are spaces in tempDir, copy the batch file to a system temp file - # - also update batchPath location in the local scope - if (str_detect(out$tempDir, " ")) { - batchTempFile <- tempfile(pattern = "runMaxent", fileext = ".bat") - file.copy(out$batchPath, batchTempFile, overwrite = T) - out$batchPath <- batchTempFile - } - # run maxent - shell(out$batchPath) - - # read lambdas output modelMaxent <- read.maxent(file.path( out$tempDir, "Outputs", - "species.lambdas", - fsep = "\\" + "species.lambdas" )) } else { - # prepare batch file - capture.output( - cat("java -mx", out$modOptions$MemoryLimit, "m", sep = ""), - file = out$batchPath - ) - cat( - " -jar", - paste0( - '"', - file.path( - ssimEnvironment()$PackageDirectory, - "maxent.jar", - fsep = "\\" - ), - '"' - ), - file = out$batchPath, - append = T - ) - if (!out$modOptions$VisibleInterface) { - cat(" -z", file = out$batchPath, append = T) - } - cat( - paste0( - ' samplesfile="', - file.path(out$tempDir, "CVsplits", "training-swd.csv", fsep = "\\"), - '"' - ), - file = out$batchPath, - append = T - ) - cat( - paste0( - ' environmentallayers="', - file.path(out$tempDir, "CVsplits", "background-swd.csv", fsep = "\\"), - '"' - ), - file = out$batchPath, - append = T - ) - if (length(out$factorInputVars) > 0) { - for (i in out$factorInputVars) { - cat(paste0(" togglelayertype=", i), file = out$batchPath, append = T) - } - } - cat( - paste0( - ' outputdirectory="', - file.path(out$tempDir, "CVsplits", fsep = "\\"), - '"' - ), - file = out$batchPath, - append = T - ) - cat( - " threads=", - out$modOptions$MultiprocessingThreads, - sep = "", - file = out$batchPath, - append = T + runMaxent( + out = out, + samplesfile = file.path(out$tempDir, "CVsplits", "training-swd.csv"), + envlayers = file.path(out$tempDir, "CVsplits", "background-swd.csv"), + outputdir = file.path(out$tempDir, "CVsplits"), + fullFit = FALSE ) - - # model complexity settings - cat( - " autofeature=", - tolower(out$modOptions$AutoFeatureSelection), - sep = "", - file = out$batchPath, - append = TRUE - ) - cat( - " betamultiplier=", - out$modOptions$RegularizationMultiplier, - sep = "", - file = out$batchPath, - append = TRUE - ) - cat( - " doclamp=", - tolower(out$modOptions$EnableClamping), - sep = "", - file = out$batchPath, - append = TRUE - ) - - # Explicit feature toggles (if auto feature selection is off) - if (!out$modOptions$AutoFeatureSelection) { - cat( - paste0( - " linear=", - tolower(out$modOptions$UseLinear), - " quadratic=", - tolower(out$modOptions$UseQuadratic), - " product=", - tolower(out$modOptions$UseProduct), - " hinge=", - tolower(out$modOptions$UseHinge), - " threshold=", - tolower(out$modOptions$UseThreshold) - ), - file = out$batchPath, - append = TRUE - ) - } - - cat( - " writeclampgrid writemess warnings prefixes", - file = out$batchPath, - append = T - ) # reverse these default settings - cat(" redoifexists autorun", file = out$batchPath, append = T) - - # Note than maxent can't handle spaces in the batch file path - # - if there are spaces in tempDir, copy the batch file to a system temp file - # - also update batchPath location in the local scope - if (str_detect(out$tempDir, " ")) { - batchTempFile <- tempfile(pattern = "runMaxent", fileext = ".bat") - file.copy(out$batchPath, batchTempFile, overwrite = T) - out$batchPath <- batchTempFile - } - - # run maxent - shell(out$batchPath) - - # read lambdas output modelMaxent <- read.maxent(file.path( out$tempDir, "CVsplits", - "species.lambdas", - fsep = "\\" + "species.lambdas" )) } return(modelMaxent) @@ -581,7 +321,7 @@ fitModel <- function( #================================================================ # BRT - #================================================================= + #================================================================ if (out$modType == "brt") { # set n-folds if (out$validationOptions$CrossValidate) { @@ -646,7 +386,7 @@ fitModel <- function( #================================================================ # GAM - #================================================================= + #================================================================ if (out$modType == "gam") { # calculating the case weights @@ -774,22 +514,98 @@ fitModel <- function( } } -### check java installation ---------------------------------------------------- -# function to check if java is installed and available on system path +## Model Fitting Helper Functions ---------------------------------------------- + +### Check Java Installation ---------------------------------------------------- +# SyncroSim 3.1.28+ sets a minimal PATH (System32 + conda dirs only) to prevent +# competing GDAL/GEOS/PROJ DLL conflicts, which can strip Java from PATH. If +# java is not found on PATH, fall back to JAVA_HOME and patch the R session +# PATH so all subsequent calls (including system2() for MaxEnt) work. checkJava <- function() { os <- Sys.info()[["sysname"]] - # set system call - cmd <- "java -version" + if (nchar(Sys.which("java")) == 0) { + javaBinDir <- NULL + + # fallback 1: JAVA_HOME environment variable + javaHome <- Sys.getenv("JAVA_HOME") + if (nchar(javaHome) > 0) { + javaBin <- file.path( + javaHome, + "bin", + if (os == "Windows") "java.exe" else "java" + ) + if (file.exists(javaBin)) javaBinDir <- file.path(javaHome, "bin") + } + + # fallback 2: search common installation directories + if (is.null(javaBinDir)) { + if (os == "Windows") { + commonRoots <- Filter( + nchar, + c(Sys.getenv("ProgramFiles"), Sys.getenv("ProgramFiles(x86)")) + ) + commonVendors <- c( + "Java", + "Eclipse Adoptium", + "Microsoft", + "BellSoft", + "Zulu", + "Amazon Corretto" + ) + for (root in commonRoots) { + for (vendor in commonVendors) { + for (jdk in list.dirs(file.path(root, vendor), recursive = FALSE)) { + if (file.exists(file.path(jdk, "bin", "java.exe"))) { + javaBinDir <- file.path(jdk, "bin") + break + } + } + if (!is.null(javaBinDir)) break + } + if (!is.null(javaBinDir)) break + } + } else { + for (d in c( + "/usr/bin", + "/usr/local/bin", + "/usr/lib/jvm/default/bin", + "/usr/lib/jvm/java/bin" + )) { + if (file.exists(file.path(d, "java"))) { + javaBinDir <- d + break + } + } + } + } + + if (!is.null(javaBinDir)) { + javaBinDir <- normalizePath(javaBinDir, winslash = "/", mustWork = FALSE) + Sys.setenv( + PATH = paste(javaBinDir, Sys.getenv("PATH"), sep = .Platform$path.sep) + ) + updateRunLog(paste0( + "\nJava found at '", + javaBinDir, + "' and added to session PATH." + )) + } + } # run the command and capture exit code status <- tryCatch( { if (os == "Windows") { - shell(cmd, intern = FALSE, ignore.stdout = TRUE, ignore.stderr = TRUE) + shell( + "java -version", + intern = FALSE, + ignore.stdout = TRUE, + ignore.stderr = TRUE + ) } else { - system(cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) + system("java -version", ignore.stdout = TRUE, ignore.stderr = TRUE) } }, error = function(e) 1L @@ -804,6 +620,80 @@ checkJava <- function() { FALSE } } + +### Run Maxent ----------------------------------------------------------------- +# Invokes MaxEnt directly via system2("java") — cross-platform, no batch file. +# system2() handles path quoting so spaces and special characters in paths work. + +runMaxent <- function( + out, + samplesfile, + envlayers, + outputdir, + testsamplesfile = NULL, + fullFit = TRUE +) { + jarPath <- file.path(ssimEnvironment()$PackageDirectory, "maxent.jar") + + args <- c(paste0("-mx", out$modOptions$MemoryLimit, "m"), "-jar", jarPath) + + if (!out$modOptions$VisibleInterface) { + args <- c(args, "-z") + } + + args <- c( + args, + paste0("samplesfile=", samplesfile), + paste0("environmentallayers=", envlayers) + ) + + if (!is.null(testsamplesfile)) { + args <- c(args, paste0("testsamplesfile=", testsamplesfile)) + } + + if (length(out$factorInputVars) > 0) { + args <- c(args, paste0("togglelayertype=", out$factorInputVars)) + } + + args <- c( + args, + paste0("outputdirectory=", outputdir), + paste0("threads=", out$modOptions$MultiprocessingThreads), + paste0("autofeature=", tolower(out$modOptions$AutoFeatureSelection)), + paste0("betamultiplier=", out$modOptions$RegularizationMultiplier), + paste0("doclamp=", tolower(out$modOptions$EnableClamping)) + ) + + if (!out$modOptions$AutoFeatureSelection) { + args <- c( + args, + paste0("linear=", tolower(out$modOptions$UseLinear)), + paste0("quadratic=", tolower(out$modOptions$UseQuadratic)), + paste0("product=", tolower(out$modOptions$UseProduct)), + paste0("hinge=", tolower(out$modOptions$UseHinge)), + paste0("threshold=", tolower(out$modOptions$UseThreshold)) + ) + } + + if (fullFit) { + args <- c( + args, + "responsecurves", + "jackknife", + "writeclampgrid", + "writemess", + "warnings", + "prefixes" + ) + } else { + args <- c(args, "writeclampgrid", "writemess", "warnings", "prefixes") + } + + args <- c(args, "redoifexists", "autorun") + + system2("java", args = args) +} + ### Read Maxent ---------------------------------------------------------------- # function to read in maxent lambdas file and extract coefficients for each feature type @@ -1003,7 +893,7 @@ est.lr <- function(dat, out) { } } -# MODEL SELECTION AND VALIDATION FUNCTIONS ------------------------------------- +# Model Selection and Validation Functions ------------------------------------- ## Run Cross Validation -------------------------------------------------------- @@ -1124,7 +1014,9 @@ cv.fct <- function( if (is.null(cv.final.mod)) { stop(paste0( - "CV fold ", i, " model fitting failed. ", + "CV fold ", + i, + " model fitting failed. ", "Consider removing or reclassifying rare factor levels, reducing the number of CV folds, ", "or reviewing the data for outliers or class imbalance." )) @@ -1166,7 +1058,10 @@ cv.fct <- function( valid_i <- !is.na(u_i) if (any(!valid_i)) { updateRunLog(paste0( - "\nWarning: ", sum(!valid_i), " site(s) in CV fold ", i, + "\nWarning: ", + sum(!valid_i), + " site(s) in CV fold ", + i, " could not be predicted and will be excluded from fold evaluation.", " This is likely caused by a factor level absent from this fold's training data.\n" )) @@ -1177,7 +1072,9 @@ cv.fct <- function( if (all(is.na(u_i))) { stop(paste0( - "CV fold ", i, " produced no valid predictions. This is likely caused by a categorical ", + "CV fold ", + i, + " produced no valid predictions. This is likely caused by a categorical ", "variable with a factor level that is absent from this fold's training data. ", "Consider removing or reclassifying rare factor levels, or reducing the number of CV folds." )) @@ -1186,7 +1083,9 @@ cv.fct <- function( if (family == "binomial" | family == "bernoulli") { if (length(unique(y_i)) < 2) { stop(paste0( - "CV fold ", i, " contains only one response class after excluding unpredictable sites. ", + "CV fold ", + i, + " contains only one response class after excluding unpredictable sites. ", "Consider removing or reclassifying rare factor levels, or reducing the number of CV folds." )) } @@ -1263,7 +1162,7 @@ cv.fct <- function( } -### Calculate Deviance function -----------[see helper functions]--------------- +## Model Evaluation Helper Functions ------------------------------------------- ### ROC Function --------------------------------------------------------------- @@ -1300,7 +1199,7 @@ roc <- function( return(round(wilc, 4)) } -### Calibration function ------------------------------------------------------- +### Calibration Function ------------------------------------------------------- calibration <- function( obs, # observed response @@ -1343,7 +1242,7 @@ calibration <- function( return(calibration.result) } -### Permute Predict function --------------------------------------------------- +### Permute Predict Function --------------------------------------------------- permute.predict <- function( inputVars, # input variables for model fitting @@ -1372,7 +1271,7 @@ permute.predict <- function( return(AUC) } -# MODEL OUTPUT FUNCTIONS ------------------------------------------------------- +# Model Output Functions ------------------------------------------------------- ## Make Model Evaluation Plots ------------------------------------------------- @@ -2070,7 +1969,7 @@ makeModelEvalPlots <- function(out = out) { return(out) } -### Calculate statistics function ---------------------------------------------- +### Calculate Statistics Function ---------------------------------------------- calcStat <- function( x, # x <- out$data[[i]] @@ -2209,7 +2108,7 @@ calcStat <- function( } } -### Variable Importance function ----------------------------------------------- +### Variable Importance Function ----------------------------------------------- VariableImportance <- function( out, # out list @@ -2494,7 +2393,7 @@ VariableImportance <- function( title(ylab = "Variables", line = 14, cex.lab = 3, font.lab = 2) } -### Confusion Matrix function -------------------------------------------------- +### Confusion Matrix Function -------------------------------------------------- confusion.matrix <- function( Stats, # output from calcStat function @@ -2646,7 +2545,7 @@ confusion.matrix <- function( ) } -### Residual Image function ---------------------------------------------------- +### Residual Image Function ---------------------------------------------------- resid.image <- function(dev.contrib, dat, file.name, label, create.image = T) { #produces a map of deviance residuals unless we're using independent evaluation data in which case @@ -2807,7 +2706,7 @@ beachcolours <- function( } -### Test/Train ROC Plot function ----------------------------------------------- +### Test/Train ROC Plot Function ----------------------------------------------- TestTrainRocPlot <- function( dat, # Stats$train$auc.data @@ -3352,7 +3251,7 @@ TestTrainRocPlot <- function( par(op) } -### Presence-Only Smoothed Calibration Plot function --------------------------- +### Presence-Only Smoothed Calibration Plot Function --------------------------- pocplot <- function(pred, back, linearize = TRUE, ...) { ispresence <- c(rep(1, length(pred)), rep(0, length(back))) @@ -3380,7 +3279,7 @@ pocplot <- function(pred, back, linearize = TRUE, ...) { predd } -### Presence-Absence Smoothed Calibration Plot function ------------------------ +### Presence-Absence Smoothed Calibration Plot Function ------------------------ pacplot <- function(pred, pa, ...) { predd <- smoothdist(preds = pred, obs = pa) @@ -3394,7 +3293,7 @@ pacplot <- function(pred, pa, ...) { ) } -#### Plotting function for Calibration plots [nested in pocplot/pacplot] ------- +#### Plotting Function for Calibration Plots [nested in pocplot/pacplot] ------- calibplot <- function( pred, @@ -3443,7 +3342,7 @@ calibplot <- function( } } -#### Smoothing function for Calibration plots [nested in pocplot/pacplot] ------ +#### Smoothing Function for Calibration Plots [nested in pocplot/pacplot] ------ smoothingdf <- 6 smoothdist <- function(preds, obs) { @@ -3484,7 +3383,7 @@ smoothdist <- function(preds, obs) { data.frame(x = x, y = y$fit, se = y$se.fit) } -### Capture Statistics function ------------------------------------------------ +### Capture Statistics Function ------------------------------------------------ capture.stats <- function( Stats.lst, # stats or lst output from calcStat function @@ -3904,7 +3803,7 @@ capture.stats <- function( } } -## response curves function ----------------------------------------------------- +## Response Curves Function ----------------------------------------------------- response.curves <- function(out) { # Desanitize output variable names diff --git a/src/8-fit-maxent.R b/src/8-fit-maxent.R index 9b1ef8b..dfef218 100644 --- a/src/8-fit-maxent.R +++ b/src/8-fit-maxent.R @@ -305,12 +305,7 @@ dir.create(file.path(ssimTempDir, "Outputs")) ## training data -out$swdPath <- swdPath <- file.path( - ssimTempDir, - "Inputs", - "training-swd.csv", - fsep = "\\" -) +out$swdPath <- swdPath <- file.path(ssimTempDir, "Inputs", "training-swd.csv") trainingData %>% mutate(Species = case_when(Response == 1 ~ "species")) %>% @@ -328,12 +323,7 @@ trainingData %>% out$data$train <- trainingData if (pseudoAbs) { - out$backgroundPath <- backgroundPath <- file.path( - ssimTempDir, - "Inputs", - "background-swd.csv", - fsep = "\\" - ) + out$backgroundPath <- backgroundPath <- file.path(ssimTempDir, "Inputs", "background-swd.csv") trainingData %>% mutate(Species = case_when(Response != 1 ~ "background")) %>% @@ -352,12 +342,7 @@ if (pseudoAbs) { ## testing data if (!is.null(testingData)) { - out$testDataPath <- testDataPath <- file.path( - ssimTempDir, - "Inputs", - "testing-swd.csv", - fsep = "\\" - ) + out$testDataPath <- testDataPath <- file.path(ssimTempDir, "Inputs", "testing-swd.csv") testingData %>% mutate(Species = case_when(Response == 1 ~ "species")) %>% drop_na(Species) %>% @@ -376,7 +361,6 @@ if (!is.null(testingData)) { gc() } -out$batchPath <- file.path(ssimTempDir, "Inputs", "runMaxent.bat", fsep = "\\") # out$maxJobs <- mulitprocessingSheet$MaximumJobs # Create output text file ------------------------------------------------------ From 526d4034676de89bc173111033d0d747901150de Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 2 Apr 2026 09:40:08 -0300 Subject: [PATCH 02/18] increment to v2.5.1 --- src/package.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/package.xml b/src/package.xml index c15a41b..626594b 100644 --- a/src/package.xml +++ b/src/package.xml @@ -1,5 +1,5 @@ - + From 022ca843391ad6153486287fcf585453b333b586 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 9 Apr 2026 15:19:21 -0300 Subject: [PATCH 03/18] Conda environment split into r and py envs Split the single shared conda environment into separate Python and R environments to allow independent dependency management and avoid conflicts between the two runtimes. --- src/package.xml | 56 +++++++++++++++++++++--------------------- src/wisdm-conda.yml | 37 ---------------------------- src/wisdm-py-conda.yml | 12 +++++++++ src/wisdm-r-conda.yml | 38 ++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 65 deletions(-) delete mode 100644 src/wisdm-conda.yml create mode 100644 src/wisdm-py-conda.yml create mode 100644 src/wisdm-r-conda.yml diff --git a/src/package.xml b/src/package.xml index 626594b..24cc6f6 100644 --- a/src/package.xml +++ b/src/package.xml @@ -253,8 +253,8 @@ displayName="1 - Prepare Multiprocessing" programArguments="1-prep-multiprocessing.py" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-py-conda.yml" + condaEnvVersion="6"> @@ -265,8 +265,8 @@ displayName="2 - Spatial Data Preparation" programArguments="2-spatial-data-preparation.py" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-py-conda.yml" + condaEnvVersion="6"> @@ -278,8 +278,8 @@ displayName="3 - Site Data Preparation" programArguments="3-site-data-preparation.py" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-py-conda.yml" + condaEnvVersion="6"> @@ -293,8 +293,8 @@ displayName="4 - Background Data Generation" programArguments="4-background-data-generation.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -308,8 +308,8 @@ displayName="5 - Prepare Training/Testing Data" programArguments="5-prepare-training-testing-data.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -321,8 +321,8 @@ displayName ="6 - Variable Reduction" programArguments="6-variable-reduction.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -336,8 +336,8 @@ displayName="7 - Hyperparameter Tuning" programArguments="7-hyperparameter-tuning.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -357,8 +357,8 @@ displayName="8 - Generalized Linear Model" programArguments="8-fit-glm.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -373,8 +373,8 @@ displayName="8 - Generalized Additive Model" programArguments="8-fit-gam.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -389,8 +389,8 @@ displayName="8 - Random Forest" programArguments="8-fit-rf.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -405,8 +405,8 @@ displayName="8 - Maxent" programArguments="8-fit-maxent.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -422,8 +422,8 @@ displayName="8 - Boosted Regression Trees" programArguments="8-fit-brt.R" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -438,8 +438,8 @@ name="ApplyModel" displayName="9 - Apply Model" programArguments="9-apply-model.R" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-r-conda.yml" + condaEnvVersion="6"> @@ -454,8 +454,8 @@ displayName="10 - Ensemble Model" programArguments="10-ensemble-model.py" isMultiprocessing="False" - condaEnv="wisdm-conda.yml" - condaEnvVersion="5"> + condaEnv="wisdm-py-conda.yml" + condaEnvVersion="6"> diff --git a/src/wisdm-conda.yml b/src/wisdm-conda.yml deleted file mode 100644 index 30c6956..0000000 --- a/src/wisdm-conda.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: wisdm-conda -channels: - - conda-forge -dependencies: - - r-base=4.1.3 - - r-adehabitathr=0.4.21 - - r-data.table=1.15.4 - - r-dismo=1.3_5 - - r-dplyr=1.1.4 - - r-gbm=2.1.9 - - r-ggplot2=3.4.2 - - r-glmnet=4.1_2 - - r-gridextra=2.3 - - r-mgcv=1.9_1 - - r-pander=0.6.5 - - r-png=0.1_8 - - r-presenceabsence=1.1.11 - - r-prroc=1.3.1 - - r-randomforest=4.7_1.1 - - r-rocr=1.0_11 - - r-rsyncrosim=2.1.3 - - r-sf=1.0_7 - - r-shiny=1.7.4 - - r-sp=2.1_4 - - r-spatstat.geom=3.2_9 - - r-terra=1.5_21 - - r-tidyr=1.3.1 - - r-tidyverse=2.0.0 - - r-xml2=1.3.3 - - r-zip=2.3.1 - - python=3.12.4 - - dask=2024.7.0 - - geopandas=1.0.1 - - pysyncrosim=2.1.0 - - pywin32=306 - - rasterio=1.3.10 - - rioxarray=0.16.0 diff --git a/src/wisdm-py-conda.yml b/src/wisdm-py-conda.yml new file mode 100644 index 0000000..b8b8382 --- /dev/null +++ b/src/wisdm-py-conda.yml @@ -0,0 +1,12 @@ +name: wisdm-py-conda +channels: + - conda-forge +dependencies: + # Python — no R spatial stack in this env so there are no GDAL/GEOS/PROJ + # cross-language constraints. Packages are pinned to tested versions. + - python=3.12.4 + - dask=2024.7.0 + - geopandas=1.0.1 + - pysyncrosim=2.2.0 + - rasterio=1.3.10 + - rioxarray=0.16.0 diff --git a/src/wisdm-r-conda.yml b/src/wisdm-r-conda.yml new file mode 100644 index 0000000..e2a47df --- /dev/null +++ b/src/wisdm-r-conda.yml @@ -0,0 +1,38 @@ +name: wisdm-r-conda +channels: + - conda-forge +dependencies: + # R base — pinned to 4.1.x; r-rsyncrosim only has r41 builds on Windows, + # and r-terra/r-dismo/r-sf require the legacy msys2 (m2w64) toolchain on + # Windows which is incompatible with newer compiled R packages. Update when + # r-rsyncrosim and r-terra publish cross-platform builds for a newer R version. + - r-base=4.1.3 + # R packages — all versions confirmed to have r41 builds on both linux-64 and + # win-64. Some versions differ from the original Windows-only pins because the + # newer Windows versions (e.g. r-data.table=1.15.4, r-sp=2.1_4) have no r41 + # linux-64 build; these are pinned to the latest version that does. + - r-adehabitathr=0.4.21 + - r-data.table=1.14.8 + - r-dismo=1.3_5 + - r-dplyr=1.1.2 + - r-gbm=2.1.8.1 + - r-ggplot2=3.4.2 + - r-glmnet=4.1_2 + - r-gridextra=2.3 + - r-mgcv=1.8_42 + - r-pander=0.6.5 + - r-png=0.1_8 + - r-presenceabsence=1.1.11 + - r-prroc=1.3.1 + - r-randomforest=4.7_1.1 + - r-rocr=1.0_11 + - r-rsyncrosim=2.1.13 + - r-sf=1.0_7 + - r-shiny=1.7.4 + - r-sp=1.6_1 + - r-spatstat.geom=3.2_1 + - r-terra=1.5_21 + - r-tidyr=1.3.0 + - r-tidyverse=2.0.0 + - r-xml2=1.3.3 + - r-zip=2.3.0 From 707d218c081d25f0d4deb42e9f07abd3e9e78a75 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 9 Apr 2026 15:27:10 -0300 Subject: [PATCH 04/18] add handeling for headless plot rendering updated 00-constants.R to set bitmapType = "cairo" on Unix when Cairo is available, removing the X11 display dependency for PNG output in R transformers. --- src/00-constants.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/00-constants.R b/src/00-constants.R index 0a5f3f8..5680b09 100644 --- a/src/00-constants.R +++ b/src/00-constants.R @@ -3,6 +3,11 @@ ## ApexRMS, October 2025 ## ------------------------- +# Use Cairo backend for PNG rendering on Linux to avoid X11 display dependency +if (.Platform$OS.type == "unix" && capabilities("cairo")) { + options(bitmapType = "cairo") +} + # Raster nodata sentinel value — must match nodataValue in setup_functions.py nodataValue <- -9999 From a32baa71c8d36c1f0472140c23df602c0f9b5e83 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 9 Apr 2026 15:29:06 -0300 Subject: [PATCH 05/18] Cross-platform path fixes --- src/04-background-data-functions.R | 3 ++- src/10-ensemble-model.py | 2 +- src/8-fit-glm.R | 2 +- src/8-fit-maxent.R | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/04-background-data-functions.R b/src/04-background-data-functions.R index 1e296ef..8e5ba95 100644 --- a/src/04-background-data-functions.R +++ b/src/04-background-data-functions.R @@ -26,7 +26,8 @@ backgroundSurfaceGeneration <- function(sp, # species if ('kde' %in% method) { if(tolower(method$surface) == "continuous"){ - kde_bg_out <- gsub('/', '\\\\', paste0(outputDir, '/', sp, '_kde_bg_surface.tif')) + kde_bg_out <- paste0(outputDir, '/', sp, '_kde_bg_surface.tif') + if (.Platform$OS.type == "windows") kde_bg_out <- gsub('/', '\\\\', kde_bg_out) kde.mat <- matrix(m, nrow = length(unique(ud@coords[, 1]))) t <- terra::rast(raster::raster(ud)) diff --git a/src/10-ensemble-model.py b/src/10-ensemble-model.py index f3878fe..1eba855 100644 --- a/src/10-ensemble-model.py +++ b/src/10-ensemble-model.py @@ -54,7 +54,7 @@ def run(): # Get path to scenario inputs ssimInputDir = myScenario.library.location + \ - ".input\\Scenario-" + str(myScenario.sid) + ".input/Scenario-" + str(myScenario.sid) # Load datasheets # inputs diff --git a/src/8-fit-glm.R b/src/8-fit-glm.R index dfced59..061f552 100644 --- a/src/8-fit-glm.R +++ b/src/8-fit-glm.R @@ -261,7 +261,7 @@ progressBar() finalMod <- fitModel(dat = trainingData, out = out) # save model to temp storage -# saveRDS(finalMod, file = paste0(ssimTempDir,"\\Data\\", modType, "_model.rds")) +# saveRDS(finalMod, file = file.path(ssimTempDir, "Data", paste0(modType, "_model.rds"))) # add relevant model details to out out$finalMod <- finalMod diff --git a/src/8-fit-maxent.R b/src/8-fit-maxent.R index dfef218..f7e9b4c 100644 --- a/src/8-fit-maxent.R +++ b/src/8-fit-maxent.R @@ -400,7 +400,7 @@ finalMod <- fitModel( # finalMod$trainingData <- trainingData # save model to temp storage -# saveRDS(finalMod, file = paste0(ssimTempDir,"\\Data\\", modType, "_model.rds")) +# saveRDS(finalMod, file = file.path(ssimTempDir, "Data", paste0(modType, "_model.rds"))) # finalMod$trainingData <- NULL # add relevant model details to out From 8e1c561ccf4a408a04ff861cfb7ee851dacdb669 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 9 Apr 2026 15:31:19 -0300 Subject: [PATCH 06/18] add explicit raster block size Explicitly set blockxsize=256, blockysize=256 when writing tiled template rasters to ensure valid GeoTIFF tiling on Linux. --- src/2-spatial-data-preparation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/2-spatial-data-preparation.py b/src/2-spatial-data-preparation.py index b71ad30..aa7a6f7 100644 --- a/src/2-spatial-data-preparation.py +++ b/src/2-spatial-data-preparation.py @@ -180,7 +180,7 @@ def check_raster_range(filepath, min_allowed=0.0, max_allowed=1.0, epsilon=1e-8) tiledTemplatePath = os.path.join( ssimTempDir, base + "_tiled" + ext) profile = src.profile.copy() - profile.update(tiled=True, compress=rasterCompression) + profile.update(tiled=True, compress=rasterCompression, blockxsize=256, blockysize=256) with rasterio.open(tiledTemplatePath, 'w', **profile) as dst: for _, window in src.block_windows(1): dst.write(src.read(window=window), window=window) From 5fce583b5a4d0922a4a78eca9b3042c9fd2b7153 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 9 Apr 2026 15:35:22 -0300 Subject: [PATCH 07/18] add headless Shiny app support - Updated launchShinyApp to detect headless Unix environments (no DISPLAY) and bind to 0.0.0.0:3838 with launch.browser = FALSE, making the interactive viewers accessible via port forwarding or SSH tunnel. - Added an appName parameter so the live status message identifies the accessible app --- src/00-helper-functions.R | 30 +++++++++++++++++++++++++++--- src/6-variable-reduction.R | 3 +-- src/7-hyperparameter-tuning.R | 2 +- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/00-helper-functions.R b/src/00-helper-functions.R index b99d635..9a31136 100644 --- a/src/00-helper-functions.R +++ b/src/00-helper-functions.R @@ -555,7 +555,7 @@ safe_rbind <- function(df, row) { # Launch a Shiny app with browser detection ------------------------------------ -launchShinyApp <- function(appPath) { +launchShinyApp <- function(appPath, appName = "Viewer") { # Search PATH first — works on all platforms (Linux, macOS, Windows) chrome.names <- c( "google-chrome", @@ -617,8 +617,32 @@ launchShinyApp <- function(appPath) { } } - # Final fallback: let Shiny use the OS default (xdg-open / open / shell.exec) - if (is.null(browser.path)) { + # On headless Unix (Docker or bare Linux server with no DISPLAY), bind to + # 0.0.0.0:3838 so the app is reachable via port forwarding from the host. + # A browser is still launched if one is found (e.g. via X11 forwarding). + # Chrome/Firefox require --no-sandbox when running as root. + # On non-headless Unix or Windows, behaviour is unchanged. + headless_unix <- .Platform$OS.type == "unix" && + nchar(Sys.getenv("DISPLAY")) == 0 + + if (headless_unix) { + port <- 3838 + progressBar( + type = "message", + message = paste0( + "ACTION REQUIRED: Open http://localhost:", + port, + " in your browser to use the Interactive ", + appName + ) + ) + shiny::runApp( + appDir = appPath, + host = "0.0.0.0", + port = port, + launch.browser = FALSE + ) + } else if (is.null(browser.path)) { shiny::runApp(appDir = appPath, launch.browser = TRUE) } else { shiny::runApp(appDir = appPath, launch.browser = function(shinyurl) { diff --git a/src/6-variable-reduction.R b/src/6-variable-reduction.R index abe4061..5174383 100644 --- a/src/6-variable-reduction.R +++ b/src/6-variable-reduction.R @@ -253,7 +253,6 @@ if ( names(covsDE) <- devInfo$covDE # run pairs explore with all variables ----------------------------------------- - options <- covariateSelectionSheet options$NumberOfPlots <- ncol(select( siteData, @@ -278,7 +277,7 @@ if ( # covsDE options <- covariateSelectionSheet - launchShinyApp(file.path(packageDir, "06-covariate-correlation-app.R")) + launchShinyApp(file.path(packageDir, "06-covariate-correlation-app.R"), appName = "Correlation Viewer") # save image files covariateCorrelationSheet <- safe_rbind( diff --git a/src/7-hyperparameter-tuning.R b/src/7-hyperparameter-tuning.R index 9e3ad42..bd32099 100644 --- a/src/7-hyperparameter-tuning.R +++ b/src/7-hyperparameter-tuning.R @@ -490,7 +490,7 @@ combineTxtFiles(filePaths = txtFilePaths, outputPath = file.path(ssimTempDir, "O # Shiny App -------------------------------------------------------------------- -launchShinyApp(file.path(packageDir, "07-hyperparameter-tuning-app.R")) +launchShinyApp(file.path(packageDir, "07-hyperparameter-tuning-app.R"), appName = "Hyperparameter Tuning App") selectedComboOutputs <- comboImgs[comboImgs$displayName == comboOut,] progressBar() From 14137c8362d4b88fda5ba84dac225be9ba014a4b Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 9 Apr 2026 15:40:28 -0300 Subject: [PATCH 08/18] index updates --- docs/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index 2c43665..4bcd84c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -15,13 +15,13 @@ permalink: / ### WISDM is an open-source SyncroSim package for developing and visualizing species distribution models.
-WISDM was designed to update and replace VisTrails SAHM, a software application originally developed in 2013 by the U.S. Geological Survey (Morisette et al. 2013). The WISDM package streamlines your species distribution modeling (SDM) workflows by preparing data, fitting model ensembles, and visualizing model outputs. WISDM maintains records of the various data inputs, processing steps, and modeling options used during SDM construction, and allows users to customize and run existing SDM methods, such as Generalized Linear Models, Random Forest, and Maxent, without having to interact with different software platforms. WISDM also allows users to easily visualize and compare model scenarios from within the SyncroSim user interface. +WISDM was designed to update and replace VisTrails SAHM, a software application originally developed in 2013 by the U.S. Geological Survey (Morisette et al. 2013). The WISDM package streamlines your species distribution modeling (SDM) workflows by preparing data, fitting model ensembles, and visualizing model outputs. WISDM maintains records of the various data inputs, processing steps, and modeling options used during SDM construction, and allows users to customize and run existing SDM methods, such as Boosted Regression Trees, Generalized Additive Models, Generalized Linear Models, Maxent, and Random Forest, without having to interact with different software platforms. WISDM also allows users to easily visualize and compare model scenarios from within the SyncroSim user interface.
## Requirements -This package requires SyncroSim version 2.5.5 or higher.
+This package requires SyncroSim version 3.1.27 or higher.
Java is also required if you choose to run Maxent models within the WISDM package. From 4e7f0b78ce4faa636e02f943a363b3db01235e3e Mon Sep 17 00:00:00 2001 From: Rich Inman Date: Mon, 13 Apr 2026 11:10:14 -0600 Subject: [PATCH 09/18] revised background generation function: improved speed and not use adehabitatHR library --- src/04-background-data-functions.R | 557 ++++++++++++++++++----------- 1 file changed, 346 insertions(+), 211 deletions(-) diff --git a/src/04-background-data-functions.R b/src/04-background-data-functions.R index 1e296ef..a79294f 100644 --- a/src/04-background-data-functions.R +++ b/src/04-background-data-functions.R @@ -4,242 +4,377 @@ ## ------------------------- # Background surface generation function --------------------------------------- - -# library(sp) -# library(adehabitatHR) -# library(spatstat.geom) -# library(sf) - -backgroundSurfaceGeneration <- function(sp, # species - template, # template raster - mask, # spatVector object bounding the study area - outputDir, # output directory - dat, # field data - method) # c('kde', 'mcp') - { # start function - - xy <- sp::SpatialPoints(dat[, c('X', 'Y')]) - ud <- adehabitatHR::kernelUD(xy, extent = 0.5, grid = 150) - mm <- ud@coords[order(ud@coords[, 1]), ] - mm <- order(ud@coords[, 2]) - m <- ud@data$ud[mm] - - if ('kde' %in% method) { - if(tolower(method$surface) == "continuous"){ - kde_bg_out <- gsub('/', '\\\\', paste0(outputDir, '/', sp, '_kde_bg_surface.tif')) - kde.mat <- matrix(m, nrow = length(unique(ud@coords[, 1]))) - - t <- terra::rast(raster::raster(ud)) - ext(t) <- ext(ud) - crs(t) <- crs(template) - t <- t / max(kde.mat) * 100 # normalize from 0-100 - t <- terra::crop(t, ext(template)) - t <- terra::resample(x = t, y = template, method = 'near') - t <- terra::ifel(!is.na(template), t, NA) - - writeRaster(x = t, - filename = kde_bg_out, - gdal = c('BIGTIFF=YES', 'TILED=YES', 'COMPRESS=LZW'), - overwrite = T ) - } - if(tolower(method$surface) == 'binary'){ - ver <- adehabitatHR::getverticeshr(ud, method$isopleth) - bg_out <- paste0(outputDir, '/', sp, '_kde_bg_surface.shp') - } +backgroundSurfacePointGeneration <- function( + sp, # species + n, # number of pseudoabsence points to generate + template, # template raster + outputDir, # output directory + dat, # field data + method) # includes method:('kde', 'mcp'); surface: ('continuous', 'binary'); isopleth +{ # start function + # TWO methods: + # KDE or MCP + # TWO options for KDE: + # continuous or binary (random within isopleth) + # ONE option for MCP: + # binary (random within isopleth) + # if KDE & continous: generate pts based on KDE surface only + # if KDE & binary: generate points randomly within isopleth given + # if MCP: generate points randomly within MCP, MCP is generaged by isopleth given + + ### ### ### ### + ### objects that all methods need + ### ### ### ### + # terra::terraOptions(tempdir=outputDir) + + + ### ### ### ### + ### objects that all methods need + ### ### ### ### + ### sf vector object of observations. will need to set CRS if obs are different + occ_sf <- sf::st_as_sf( + dat[dat$Response==1, c("X", "Y")], + coords = c("X", "Y"), + crs = crs(template), + remove = FALSE + ) + + if ('kde' %in% method) { + ### set outnames + kde_bg_out = gsub('/', '\\\\', paste0(outputDir, '/', sp, '_kde_bg_surface.tif')) + kde_pts_out = paste0(outputDir, '/', sp, '_kde_bg_pts.csv') + bg_pts_out = paste0(outputDir, '/', sp, '_kde_bg_pts.shp') + ### don't waste time generating points if they exist + if(file.exists(kde_pts_out)){ + stop("KDE background points already exist!\nDelete current points to proceed.") } - if("mcp" %in% method){ - ver <- adehabitatHR::mcp(xy, percent = method$isopleth) - bg_out <- paste0(outputDir, '/', sp, '_mcp_bg_surface.shp') - } + ### get cell resolution of template + template_res = terra::res(template)[1] + ### get bounding box of observations and buffer by 10 grid cells + bbox <- sf::st_bbox(c( + xmin = min(dat[,"X"]), + ymin = min(dat[,"Y"]), + xmax = max(dat[,"X"]), + ymax = max(dat[,"Y"])), + crs = crs(template)) + ### get cell width to use for kernel density. cell width determined here is approximately what's needed to create: + # 2,500 (50 x 50) + # 10,000 (100 x 100) + # 1,000,000 (1000 x 1000) + # tiles in bounding box of observations for generating the kernel density estimate. This keeps resolution reasonable, but still fast. + cell_width <- mean(c( + ((bbox$xmax - bbox$xmin) / 50), + ((bbox$ymax - bbox$ymin) / 50) + )) + ### expand bounding box by 1/2 width and length + buffer_width <- mean(c( + ((bbox$xmax - bbox$xmin) / 2), + ((bbox$ymax - bbox$ymin) / 2) + )) + bbox <- bbox |> + sf::st_as_sfc() |> + sf::st_buffer(dist = buffer_width)|> + sf::st_bbox() + ### create owin + win <- spatstat.geom::as.owin(bbox) + ### set window + OBSppp <- spatstat.geom::ppp(dat[,"X"], dat[,"Y"], window=win) + # plot(OBSppp) + ### set bandwidth for KDE. This should be a tunable parameter. sigma, estimated here, is ~ h/2, where h is the parameter estimated by kernealHD with the href method. + bw <- spatstat.explore::bw.ppl(OBSppp) |> round() * 2 # sigma = h/2, where h is estimated from adehabitatHR:kernelUD (original code) + npts <- spatstat.geom::npoints(OBSppp) + ### generate KDE layer + density_ppp <- spatstat.explore::density.ppp( + OBSppp, + sigma=bw, + weights = rep(1 / npts, npts), # ensure that intensity integrates to 1 + eps=cell_width) + r <- terra::rast(density_ppp) + terra::crs(r) <- terra::crs(template) - if ("mcp" %in% method | tolower(method$surface) == 'binary') { - - WindowList <- list() - for (j in 1:length(ver@polygons[[1]]@Polygons)) { - x <- y <- vector() - x <- ver@polygons[[1]]@Polygons[[j]]@coords[, 1] - y <- ver@polygons[[1]]@Polygons[[j]]@coords[, 2] - xy <- paste(x, y, sep = "") - x <- x[!(duplicated(xy))] - y <- y[!(duplicated(xy))] - - MyWindow <- - try(spatstat.geom::owin(poly = list(x = x[length(x):1], y = y[length(x):1])), silent = - TRUE) - if (class(MyWindow) == "try-error") - MyWindow <- - spatstat.geom::owin(poly = list(x = x[1:length(x)], y = y[1:length(x)])) - ifelse(j == 1, WindowList <- MyWindow, { - ifelse( - spatstat.geom::is.subset.owin(MyWindow, WindowList), - WindowList <- spatstat.geom::setminus.owin(WindowList, MyWindow), - WindowList <- spatstat.geom::union.owin(MyWindow, WindowList) - ) - }) - } - - outVect <- terra::vect(sf::st_as_sf(WindowList)) - crs(outVect) <- terra::crs(template) - outVect <- terra::intersect(outVect, mask) - terra::writeVector(outVect, bg_out, overwrite = T) - } + ### rescale weights. this is time consuming for large rasters. + terra::setMinMax(r, force=TRUE); mm=terra::minmax(r) + r <- (r-mm[1])/(mm[2]-mm[1]) * 100 + r <- round(r) + # set all values that are 0 to be NA + r <- ifel(r==0, NA, r) - } # end function - - -# Background point (psuedo-absence) generation function ------------------------- - -# library(data.table) - - -backgroundPointGeneration <- function(sp, # species name - n, # number of pseudoabsence points to generate - method, # c('kde','mcp') - outputDir, # output directory - target_file, # path to csv with growth form-specific points (target background point pool) - overwrite) # overwrite existing files - { # start function - - if('kde' %in% method){ + ### create new template that is cropped to sampling grid + template_small <- terra::crop(template, r) + ### resample to template. this is time consuming for large rasters. + r <- terra::resample(r, template_small, method="bilinear") + ### crop & mask to template + r <- terra::mask(r, template_small) - kde_pts_out <- paste0(outputDir, '/', sp, '_kde_bg_pts.csv') - - if(file.exists(kde_pts_out)){ - stop("KDE background points already exist!\nDelete current points to proceed.") - } + # ### write out raster + # # global(!is.na(r), "sum", na.rm=TRUE) + # terra::writeRaster(x = r, + # filename = kde_bg_out, + # overwrite=TRUE, + # gdal=c('COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES')) + ### get cells that are observations + occ_cellIDs <- terra::extract(r, vect(occ_sf), cells = TRUE) + ### IF CONTINOUS, SAMPLE WITH WEIGHTS if(tolower(method$surface) == "continuous"){ - ### Find KDE raster - r <- list.files(path = outputDir, pattern = 'kde.*.tif$', full.names = T) - r <- terra::rast(r) - - valid <- data.table::data.table() - - # you can extract many values, very quickly with terra - inc <- 10000000 - total <- terra::ncell(r) - - # since we're not ever going to put a point on the kde surface where values - # are < 1, surfaces can be subset to the *truly* available values. For - # species with very limited distributions, this dramatically decreases the - # amount of time the code spends searching for valid bg locations. - - if(total > inc){ - - cat('Finding viable cells...\n') - - pb <- txtProgressBar(0, 100, style = 3) - - for(i in seq(1,ncell(r),inc)){ - - tmp <- terra::extract(r, i:(i + inc)) - tmp$id <- c(i:(i + inc)) - tmp$kde_bg_surface <- ifelse(tmp[ ,1] < 1, NA, tmp[ ,1]) - tmp <- na.omit(tmp) - - valid <- rbind(valid, tmp) + pts <- data.table::data.table() + system.time({ + while(nrow(pts) < n){ + ### take spatially random sample all cells in OCC region + rnd_sample <- terra::spatSample( + x=r, + size=n*2, + method='random', + cells=TRUE, + xy=FALSE, + replace=TRUE, + as.raster=FALSE, + as.points=FALSE, + na.rm=TRUE) |> data.table::data.table() + names(rnd_sample)[1] = "cell" + names(rnd_sample)[2] = "weights" - perc_complete <- ((i + inc) / total) * 100 + ### drop cells that are occurrence + rnd_sample[rnd_sample$cell%in%occ_cellIDs$cell, "weights"] <- 0 - setTxtProgressBar(pb, ifelse(perc_complete <= 100, perc_complete, 100)) + ### sample the sample with weights + selected_cells <- sample( + rnd_sample$cell, + size = 1000, + replace = TRUE, + prob = rnd_sample$weights + ) + ### combine sample set with complete set, drop duplicates if any + pts <- rbind(pts, data.table::as.data.table(xyFromCell(r, selected_cells))) + pts <- unique(pts) + pts <- pts[complete.cases(pts), ] } - - close(pb) - - } else { - - tmp <- terra::extract(r, 1:inc) - tmp$id <- 1:inc - tmp$kde_bg_surface <- ifelse(tmp[ ,1] < 1, NA, tmp[ ,1]) - tmp <- na.omit(tmp) - - valid <- rbind(valid, tmp) - + }) + ### reduce pts to size of background points requested. random sample is all that's needed here. not sampling by prob. Just thinning to limit size + if(nrow(pts) > n){ + pts_ind <- sample(nrow(pts), size=n, replace=FALSE) + pts <- pts[pts_ind,] } + ### write out + names(pts) <- c("X", "Y") + pts$Response <- -9998 + data.table::fwrite(pts, kde_pts_out) + # occ_sf <- sf::st_as_sf( + # pts[, c("X", "Y")], + # coords = c("X", "Y"), + # crs = crs(template), + # remove = FALSE + # ) |> terra::vect() + # terra::writeVector(occ_sf, bg_pts_out, overwrite = T) - pts <- data.table::data.table() - used <- data.table::data.table() + } # end tolower(method$surface) == "continuous" + + ### if BINARY, SAMPLE WITH OUT WEIGHTS + if(tolower(method$surface) == 'binary'){ + ### set outnames + # Normalize to create a probability surface + total_sum <- terra::global(r, "sum", na.rm = TRUE)[1,1] + f <- r / total_sum + # Get values and cell numbers for non-NA cells only + v <- terra::values(f, mat = FALSE) + non_na_cells <- which(!is.na(v)) + vals <- v[non_na_cells]; rm(v); gc() + # Get coordinates of non-NA cells using cell numbers + coords <- terra::xyFromCell(f, non_na_cells); rm(non_na_cells); gc() + # Sort values and coords by decreasing probability + ord <- order(vals, decreasing = TRUE) + vals_sorted <- vals[ord]; rm(vals); gc() + # coords_sorted <- coords[ord, ]; rm(coords); gc() + # Cumulative sum to find 95% threshold + cum_prob <- cumsum(vals_sorted) + isopleth <- method$isopleth/100 + idx <- which(cum_prob >= isopleth)[1] + threshold <- vals_sorted[idx]; rm(idx, vals_sorted); gc() + # Create binary mask of cells >= threshold + outRast <- terra::ifel(r >= threshold, 1, NA) + ### mask to template to get holes + outRast <- mask(outRast, template_small) + # ### write out raster + # terra::writeRaster(x <- outRast, + # filename = kde_bg_out, + # overwrite=TRUE, + # gdal=c('COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES')) + # plot(template) + # plot(outVect, add=TRUE) + # plot(pts, add=TRUE) + + ### get CELL IDs of observations + occ_cellIDs <- terra::extract(outRast, vect(occ_sf), cells = TRUE) + + ### RANDOMLY SAMPLE RASTER WITH NO WEIGHTS + ### then remove occ locations & dups and sample again. + pts <- data.table::data.table() while(nrow(pts) < n){ + ### take spatially random sample of entire study area + system.time({ + rnd_sample <- terra::spatSample( + outRast, + size=n*2, + method='random', + cells=TRUE, + xy=TRUE, + replace=FALSE, + as.raster=FALSE, + as.points=FALSE, + na.rm=TRUE) |> data.table::data.table() + names(rnd_sample)[1] = "cell" + names(rnd_sample)[2] = "X" + names(rnd_sample)[3] = "Y" + }) # - tmp <- valid[sample(nrow(valid), 1000),] - tmp <- tmp[!(tmp$id %in% used$id), ] - - if(nrow(tmp) == 0) next() - - used <- rbind(used, tmp) + ### drop cells that are occurrence + rnd_sample <- rnd_sample[!rnd_sample$cell%in%occ_cellIDs$cell, ] - # generate column of random integers between 1 and 100. - # If the id field is >= this number, it is retained. - tmp[, test := as.numeric(sample(1:100, nrow(tmp), replace = T))] - - # keep true values only - tmp <- tmp[, retain := ifelse(tmp[,1] >= test, T, F)][retain == T] - - if(nrow(tmp) > 0){ - pts <- rbind(pts, tmp) - } else { - next() - } + ### combine sample set with complete set, drop duplicates if any + pts <- rbind(pts, rnd_sample) + pts <- unique(pts) + pts <- pts[complete.cases(pts), ] } - + ### reduce pts to size of background points requested. random sample is all that's needed here. not sampling by prob. Just thinning to limit size if(nrow(pts) > n){ - pts <- pts[sample(1:nrow(pts), n, prob = pts[[1]], replace = F), ] + pts_ind <- sample(nrow(pts), size=n, replace=FALSE) + pts <- pts[pts_ind,] } - - # convert to spatVector and write out - v <- as.data.frame(terra::xyFromCell(r, pts[[2]])) - v$Response <- c(rep(backgroundValue, n)) - colnames(v) <- c("X", "Y", "Response") - write.csv(v, kde_pts_out, row.names = F) - - cat(nrow(v), 'pseudoabsence points generated for continuous KDE surface\n') + ### write out + pts$Response <- -9998 + data.table::fwrite(pts, kde_pts_out) + # occ_sf <- sf::st_as_sf( + # pts[, c("X", "Y")], + # coords = c("X", "Y"), + # crs = crs(template), + # remove = FALSE + # ) |> terra::vect() + # terra::writeVector(occ_sf, bg_pts_out, overwrite = T) + } # end tolower(method$surface) == 'binary' + } # end 'kde' %in% method + + if("mcp" %in% method){ + ### set outnames + mcp_pts_out <- paste0(outputDir, '/', sp, '_mcp_bg_pts.csv') + bg_pts_out <- paste0(outputDir, '/', sp, '_mcp_bg_pts.shp') + ### don't waste time generating points if they exist + if(file.exists(mcp_pts_out)){ + stop("MCP background points already exist!\nDelete current points to proceed.") } - if(tolower(method$surface) == "binary"){ - # v <- sf::st_read(list.files(path = outputDir, pattern = 'kde.*shp', full.names = T), quiet = TRUE) - # - # if(missing(target_file)) stop('Missing path to target bg dataset!') - # pts <- readr::read_csv(target_file, show_col_types = F, progress = F) |> - # dplyr::select(1:2) |> - # sf::st_as_sf(coords = c('X','Y'), crs = sf::st_crs(v)) - # - # test <- sf::st_intersects(v, pts, sparse = T) - # pts <- pts[unlist(test),] |> unique() - # - # if(nrow(pts) > n){ - # pts <- pts[sample(nrow(pts), n), ] - # } - # - # pts <- as.data.frame(sf::st_coordinates(pts)) - - v <- vect(list.files(path = outputDir, pattern = 'kde.*shp', full.names = T)) - pts <- crds(spatSample(v, n, "random")) - - pts <- as.data.frame(pts) - names(pts) <- c("X", "Y") - pts$Response <- backgroundValue - write.csv(pts, kde_pts_out, row.names = F) - - cat(nrow(pts), 'pseudoabsence points generated for binary KDE surface\n') + # get X,Y coords + xy <- fieldDataSheet[,c("X", "Y")] + n_vals <- nrow(xy) + if (n_vals < 3L){ + stop("Need at least 3 points for a polygon.") } - } - - if('mcp' %in% method){ - mcp_pts_out <- paste0(outputDir, '/', sp, '_mcp_bg_pts.csv') + # center points to calc isopleth + cx <- mean(xy[, 1]) + cy <- mean(xy[, 2]) + + # Squared distances (no sqrt: avoids extra cost) + d2 <- (xy[, 1] - cx)^2 + (xy[, 2] - cy)^2 + rm(cx, cy) + + # threshold with quantile from isopleth + thr <- stats::quantile(d2, probs = method$isopleth / 100, names = FALSE, type = 7) + keep <- d2 <= thr + rm(d2, thr) + + if (sum(keep) < 3L) stop("Fewer than 3 points remain after trimming.") + + # Convex hull indices on trimmed set + id_trim <- which(keep) + hull_local <- grDevices::chull(xy[keep, 1], xy[keep, 2]) + hull_idx <- id_trim[hull_local] + + # Close polygon ring + poly_coords <- rbind(xy[hull_idx, , drop = FALSE], xy[hull_idx[1], , drop = FALSE]) + + # convert to polygon + polygons <- poly_coords |> + sf::st_as_sf(coords = c("X", "Y"), crs = crs(template)) |> + summarize(geometry = sf::st_combine(geometry)) |> + sf::st_cast("POLYGON") + + ### set path for bg surface + bg_out <- paste0(outputDir, '/', sp, '_mcp_bg_surface.tif') - v <- vect(list.files(path = outputDir, pattern = 'mcp.*shp', full.names = T)) - pts <- crds(spatSample(v, n, "random")) + ### convert to vect & set crs + outVect <- terra::vect(polygons); rm(polygons) + crs(outVect) <- terra::crs(template) - pts <- as.data.frame(pts) - names(pts) <- c("X", "Y") - pts$Response <- backgroundValue - write.csv(pts, mcp_pts_out, row.names = F) + ### create new template that is cropped to sampling grid + template_small <- terra::crop(template, outVect) - cat(nrow(pts), 'pseudoabsence points generated for MCP surface\n') - } + ### convert to raster + outRast <- rasterize(outVect, template_small) + + ### mask to template to get holes + outRast <- mask(outRast, template_small) + + # ### write out raster + # terra::writeRaster(x <- outRast, + # filename = bg_out, + # overwrite=TRUE, + # gdal=c('COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES')) + + # plot(template) + # plot(outVect, add=TRUE) + # plot(pts, add=TRUE) + + ### get CELL IDs of observations + occ_cellIDs <- terra::extract(outRast, vect(occ_sf), cells = TRUE) + + ### RANDOMLY SAMPLE RASTER WITH NO WEIGHTS + ### then remove occ locations & dups and sample again. + pts <- data.table::data.table() + while(nrow(pts) < n){ + ### take spatially random sample of entire study area + system.time({ + rnd_sample <- terra::spatSample( + outRast, + size=n*2, + method='random', + cells=TRUE, + xy=TRUE, + replace=FALSE, + as.raster=FALSE, + as.points=FALSE, + na.rm=TRUE) |> data.table::data.table() + names(rnd_sample)[1] = "cell" + names(rnd_sample)[2] = "X" + names(rnd_sample)[3] = "Y" + }) # + + ### drop out cells that are occurrence + rnd_sample <- rnd_sample[!rnd_sample$cell%in%occ_cellIDs$cell, ] + + ### combine sample set with complete set, drop duplicates if any + pts <- rbind(pts, rnd_sample) + pts <- unique(pts) + pts <- pts[complete.cases(pts), ] + } + ### reduce pts to size of background points requested. random sample is all that's needed here. not sampling by prob. Just thinning to limit size + if(nrow(pts) > n){ + pts_ind <- sample(nrow(pts), size=n, replace=FALSE) + pts <- pts[pts_ind,] + } + ### write out + pts$Response <- -9998 + data.table::fwrite(pts, mcp_pts_out) + # occ_sf <- sf::st_as_sf( + # pts[, c("X", "Y")], + # coords = c("X", "Y"), + # crs = crs(template), + # remove = FALSE + # ) |> terra::vect() + # terra::writeVector(occ_sf, bg_pts_out, overwrite = T) + } # end "mcp" %in% method -} # End Function \ No newline at end of file +} # end backgroundSurfacePointGeneration + From 2b5316d62e82696e8bc8fdff90ab205efae05b1b Mon Sep 17 00:00:00 2001 From: Rich Inman Date: Mon, 13 Apr 2026 11:11:14 -0600 Subject: [PATCH 10/18] revised background generation script: call revised function --- src/4-background-data-generation.R | 233 ++++++++++------------------- 1 file changed, 77 insertions(+), 156 deletions(-) diff --git a/src/4-background-data-generation.R b/src/4-background-data-generation.R index dff53cc..d85ceb8 100644 --- a/src/4-background-data-generation.R +++ b/src/4-background-data-generation.R @@ -84,7 +84,7 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if (is.na(backgroundDataOptionsSheet$KDESurface)) { if ( backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Kernel Density Estimate (KDE)" + "Kernel Density Estimate (KDE)" ) { backgroundDataOptionsSheet$KDESurface <- "Continuous" } @@ -92,8 +92,8 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if (is.na(backgroundDataOptionsSheet$Isopleth)) { if ( backgroundDataOptionsSheet$KDESurface == "Binary" | - backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Minimum Convex Polygon (MCP)" + backgroundDataOptionsSheet$BackgroundGenerationMethod == + "Minimum Convex Polygon (MCP)" ) { backgroundDataOptionsSheet$Isopleth <- 95 } @@ -130,7 +130,7 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if ( backgroundDataOptionsSheet$GenerateBackgroundSites && - any(fieldDataSheet$Response == backgroundValue) + any(fieldDataSheet$Response == backgroundValue) ) { updateRunLog(paste0( "\nWarning: ", @@ -145,7 +145,7 @@ if ( if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if ( backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Kernel Density Estimate (KDE)" + "Kernel Density Estimate (KDE)" ) { methodInputs <- list( "method" = "kde", @@ -155,7 +155,7 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { } if ( backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Minimum Convex Polygon (MCP)" + "Minimum Convex Polygon (MCP)" ) { methodInputs <- list( "method" = "mcp", @@ -163,181 +163,102 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { "isopleth" = backgroundDataOptionsSheet$Isopleth ) } - + + ### load template raster templateRaster <- rast(templateSheet$RasterFilePath) - - # create mask polygon from extent of valid data in template - templateVector <- as.polygons(templateRaster) - - # generate background surface - backgroundSurfaceGeneration( + + backgroundSurfacePointGeneration( sp = "species", + n=backgroundDataOptionsSheet$BackgroundSiteCount+100, template = templateRaster, - method = methodInputs, - mask = templateVector, outputDir = ssimTempDir, - dat = fieldDataSheet - ) - + dat = fieldDataSheet, + method = methodInputs) + progressBar() - rm(templateVector) gc() - - # generate background (psuedo-absence) points - backgroundPointGeneration( - sp = "species", - outputDir = ssimTempDir, - n = backgroundDataOptionsSheet$BackgroundSiteCount + 100, - method = methodInputs, - # target_file = backgroundDataOptionsSheet, # this is an external input... - overwrite = T - ) - - progressBar() + # add background point to field data - bgData <- read.csv(file.path( + bgData <- data.table::fread(file.path( ssimTempDir, paste0("species_", methodInputs$method, "_bg_pts.csv") )) + + ### stop if no background pts were returned + if (nrow(bgData) == 0) { + updateRunLog( + "\nWarning: No background sites remained after filtering against presence pixels. No background data was added to Field Data.\n" + ) + stop("Warning: No background sites remained after filtering against presence pixels. No background data was added to Field Data.") + } + + progressBar() + + ### create SiteIDs for background data startId <- max(fieldDataSheet$SiteID, siteDataSheet$SiteID) + 1 bgData$SiteID <- startId:(startId + nrow(bgData) - 1) + + ### create spatvect object for extract + vbgPts <- vect(bgData, geom = c("X", "Y"), crs = crs(templateRaster)) + + ### add columns for combining with field data. bgData$UseInModelEvaluation <- NA bgData$ModelSelectionSplit <- NA bgData$Weight <- NA - fieldData <- rbind(fieldDataSheet, bgData) - - ## Remove background points that occur in pixels with presence points ----- + ### update field data weights. This is a legacy item, and needs to be revisted. Currently set the weights for ALL background data to 1 if any weights were supplied with field data. User options for setting other weights. + if (any(!is.na(fieldDataSheet$Weight))) { + bgData$Weight <- 1 + } - # rasterize points data - r <- rast( - ext(templateRaster), - resolution = res(templateRaster), - crs = crs(templateRaster) + ### subset cols + sel_cols = names(fieldDataSheet) + bgData = bgData[,..sel_cols]; rm(sel_cols) + fieldDataSheet <- rbind(fieldDataSheet, bgData) + fieldDataSheet$SiteID <- format(fieldDataSheet$SiteID, scientific = F) + + ## Extract covariate data for background sites ----- + ### loop through each + for (i in 1:nrow(covariateDataSheet)) { + ri = terra::rast(covariateDataSheet$RasterFilePath[i]) + vals = terra::extract( + ri, vbgPts, + method="simple", + cells=FALSE, + xy=FALSE, + ID=TRUE) + rm(ri); gc() + bgData[,covariateDataSheet$CovariatesID[i]:=vals[,2]] + # progressBar() + } + bgData[, SiteID := as.character(SiteID)] + + CovariatesID = covariateDataSheet$CovariatesID[!covariateDataSheet$CovariatesID %in% "mtpi_10m"] + + ### trim cols to format and melt to long format + keep_cols = c("SiteID", covariateDataSheet$CovariatesID) + bgData = bgData[,..keep_cols] + bgSiteData <- gather( + data = bgData, + key = CovariatesID, + value = Value, + -SiteID ) - pts <- vect(fieldData, geom = c("X", "Y"), crs = crs(templateRaster)) - rm(templateRaster, fieldData) - gc() - rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2])) - cellPerPt <- terra::extract(rIDs, pts) - cellPerPt$SiteID <- pts$SiteID - names(cellPerPt)[2] <- "PixelID" - pts <- merge(pts, cellPerPt[, c("PixelID", "SiteID")]) - rm(rIDs, cellPerPt) + siteDataSheet <- rbind(siteDataSheet, bgSiteData) + siteDataSheet$SiteID <- format(siteDataSheet$SiteID, scientific = F) + siteDataSheet <- siteDataSheet %>% + distinct(SiteID, CovariatesID, .keep_all = T) + rm(bgSiteData) gc() - # check for duplicate pixel ids - if (any(duplicated(pts$PixelID))) { - dups <- pts[which(duplicated(pts$PixelID)), ] - dropSites <- dups$SiteID[which(dups$Response == backgroundValue)] - pts <- pts[!pts$SiteID %in% dropSites, ] - rm(dups, dropSites) - } - - bgPts <- pts[pts$Response == backgroundValue] - bgPts$PixelID <- NULL - rm(pts) + # save site data to scenario + saveDatasheet(myScenario, siteDataSheet, "wisdm_SiteData") + rm(siteDataSheet) gc() - # remove extra bg sites - if (nrow(bgPts) > backgroundDataOptionsSheet$BackgroundSiteCount) { - bgPts <- sample( - x = bgPts, - size = backgroundDataOptionsSheet$BackgroundSiteCount, - replace = F - ) - } - if (nrow(bgPts) < backgroundDataOptionsSheet$BackgroundSiteCount) { - updateRunLog(paste0( - nrow(bgPts), - " psuedoabsence sites added to Field Data when ", - backgroundDataOptionsSheet$BackgroundSiteCount, - " were requested." - )) - # backgroundDataOptionsSheet$BackgroundSiteCount <- nrow(bgPts) - } - - progressBar() - - if (nrow(bgPts) == 0) { - updateRunLog( - "\nWarning: No background sites remained after filtering against presence pixels. No background data was added to Field Data.\n" - ) - } else { - ## Extract covariate data for background sites ----- - - # rasterize bg data - rastPts <- rasterize(bgPts, r) - matPts <- as.matrix(rastPts, wide = T) - keep <- which(!is.na(matPts)) - rm(rastPts) - gc() - - rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2])) - cellPerPt <- terra::extract(rIDs, bgPts) - cellPerPt$SiteID <- bgPts$SiteID - names(cellPerPt)[2] <- "PixelID" - bgPts <- merge(bgPts, cellPerPt[, c("PixelID", "SiteID")]) - rm(rIDs) - gc() - - rPixels <- rasterize(bgPts, r, field = "PixelID") - matPixs <- as.matrix(rPixels, wide = T) - PixelIDs <- matPixs[keep] - PixelData <- data.frame(PixelID = PixelIDs) - rm(rPixels, matPixs, PixelIDs) - gc() - - for (i in 1:nrow(covariateDataSheet)) { - ri <- rast(covariateDataSheet$RasterFilePath[i]) - mi <- as.matrix(ri, wide = TRUE) - - outMat <- mi * matPts - vals <- outMat[keep] - rm(ri, mi, outMat) - gc() - - PixelData[covariateDataSheet$CovariatesID[i]] <- vals - progressBar() - } - - # convert background site data to long format and add to existing site datasheet - bgSiteData <- merge(cellPerPt[, c("PixelID", "SiteID")], PixelData) - bgSiteData$PixelID <- NULL - bgSiteData <- gather( - data = bgSiteData, - key = CovariatesID, - value = Value, - -SiteID - ) - - siteDataSheet <- rbind(siteDataSheet, bgSiteData) - siteDataSheet$SiteID <- format(siteDataSheet$SiteID, scientific = F) - siteDataSheet <- siteDataSheet %>% - distinct(SiteID, CovariatesID, .keep_all = T) - rm(bgSiteData) - gc() - - # save site data to scenario - saveDatasheet(myScenario, siteDataSheet, "wisdm_SiteData") - rm(siteDataSheet) - gc() - - # update field data - bgData <- bgData[which(bgData$SiteID %in% bgPts$SiteID), ] - if (any(!is.na(fieldDataSheet$Weight))) { - bgData$Weight <- 1 - } - - fieldDataSheet <- rbind(fieldDataSheet, bgData) - fieldDataSheet$SiteID <- format(fieldDataSheet$SiteID, scientific = F) - rm(bgData, bgPts) - gc() - } # end bgPts check - # save updated field data to scenario saveDatasheet(myScenario, fieldDataSheet, "wisdm_FieldData", append = F) } From 1a9910487bbf3a85880f599494c7893dd05d2bf9 Mon Sep 17 00:00:00 2001 From: Rich Inman Date: Mon, 13 Apr 2026 11:12:09 -0600 Subject: [PATCH 11/18] updating .yml to add spatstat.explore and main spatstat libraries --- src/wisdm-conda.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wisdm-conda.yml b/src/wisdm-conda.yml index 30c6956..8a53e61 100644 --- a/src/wisdm-conda.yml +++ b/src/wisdm-conda.yml @@ -22,7 +22,9 @@ dependencies: - r-sf=1.0_7 - r-shiny=1.7.4 - r-sp=2.1_4 + - r-spatstat=3.0_7 - r-spatstat.geom=3.2_9 + - r-spatstat.explore=3.2_6 - r-terra=1.5_21 - r-tidyr=1.3.1 - r-tidyverse=2.0.0 From 0bd38220b1ff279ebdfeb1ddb66acc4daea7da30 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 16 Apr 2026 10:52:20 -0300 Subject: [PATCH 12/18] Revert "updating .yml to add spatstat.explore and main spatstat libraries" This reverts commit 1a9910487bbf3a85880f599494c7893dd05d2bf9. --- src/wisdm-conda.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/wisdm-conda.yml b/src/wisdm-conda.yml index 8a53e61..30c6956 100644 --- a/src/wisdm-conda.yml +++ b/src/wisdm-conda.yml @@ -22,9 +22,7 @@ dependencies: - r-sf=1.0_7 - r-shiny=1.7.4 - r-sp=2.1_4 - - r-spatstat=3.0_7 - r-spatstat.geom=3.2_9 - - r-spatstat.explore=3.2_6 - r-terra=1.5_21 - r-tidyr=1.3.1 - r-tidyverse=2.0.0 From 74d467a95b0a736783abb4351bc6b9fdd0700763 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 16 Apr 2026 10:52:42 -0300 Subject: [PATCH 13/18] Revert "revised background generation script: call revised function" This reverts commit 2b5316d62e82696e8bc8fdff90ab205efae05b1b. --- src/4-background-data-generation.R | 233 +++++++++++++++++++---------- 1 file changed, 156 insertions(+), 77 deletions(-) diff --git a/src/4-background-data-generation.R b/src/4-background-data-generation.R index d85ceb8..dff53cc 100644 --- a/src/4-background-data-generation.R +++ b/src/4-background-data-generation.R @@ -84,7 +84,7 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if (is.na(backgroundDataOptionsSheet$KDESurface)) { if ( backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Kernel Density Estimate (KDE)" + "Kernel Density Estimate (KDE)" ) { backgroundDataOptionsSheet$KDESurface <- "Continuous" } @@ -92,8 +92,8 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if (is.na(backgroundDataOptionsSheet$Isopleth)) { if ( backgroundDataOptionsSheet$KDESurface == "Binary" | - backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Minimum Convex Polygon (MCP)" + backgroundDataOptionsSheet$BackgroundGenerationMethod == + "Minimum Convex Polygon (MCP)" ) { backgroundDataOptionsSheet$Isopleth <- 95 } @@ -130,7 +130,7 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if ( backgroundDataOptionsSheet$GenerateBackgroundSites && - any(fieldDataSheet$Response == backgroundValue) + any(fieldDataSheet$Response == backgroundValue) ) { updateRunLog(paste0( "\nWarning: ", @@ -145,7 +145,7 @@ if ( if (backgroundDataOptionsSheet$GenerateBackgroundSites) { if ( backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Kernel Density Estimate (KDE)" + "Kernel Density Estimate (KDE)" ) { methodInputs <- list( "method" = "kde", @@ -155,7 +155,7 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { } if ( backgroundDataOptionsSheet$BackgroundGenerationMethod == - "Minimum Convex Polygon (MCP)" + "Minimum Convex Polygon (MCP)" ) { methodInputs <- list( "method" = "mcp", @@ -163,102 +163,181 @@ if (backgroundDataOptionsSheet$GenerateBackgroundSites) { "isopleth" = backgroundDataOptionsSheet$Isopleth ) } - - ### load template raster + templateRaster <- rast(templateSheet$RasterFilePath) - - backgroundSurfacePointGeneration( + + # create mask polygon from extent of valid data in template + templateVector <- as.polygons(templateRaster) + + # generate background surface + backgroundSurfaceGeneration( sp = "species", - n=backgroundDataOptionsSheet$BackgroundSiteCount+100, template = templateRaster, + method = methodInputs, + mask = templateVector, outputDir = ssimTempDir, - dat = fieldDataSheet, - method = methodInputs) - + dat = fieldDataSheet + ) + progressBar() + rm(templateVector) gc() - + + # generate background (psuedo-absence) points + backgroundPointGeneration( + sp = "species", + outputDir = ssimTempDir, + n = backgroundDataOptionsSheet$BackgroundSiteCount + 100, + method = methodInputs, + # target_file = backgroundDataOptionsSheet, # this is an external input... + overwrite = T + ) + + progressBar() # add background point to field data - bgData <- data.table::fread(file.path( + bgData <- read.csv(file.path( ssimTempDir, paste0("species_", methodInputs$method, "_bg_pts.csv") )) - - ### stop if no background pts were returned - if (nrow(bgData) == 0) { - updateRunLog( - "\nWarning: No background sites remained after filtering against presence pixels. No background data was added to Field Data.\n" - ) - stop("Warning: No background sites remained after filtering against presence pixels. No background data was added to Field Data.") - } - - progressBar() - - ### create SiteIDs for background data startId <- max(fieldDataSheet$SiteID, siteDataSheet$SiteID) + 1 bgData$SiteID <- startId:(startId + nrow(bgData) - 1) - - ### create spatvect object for extract - vbgPts <- vect(bgData, geom = c("X", "Y"), crs = crs(templateRaster)) - - ### add columns for combining with field data. bgData$UseInModelEvaluation <- NA bgData$ModelSelectionSplit <- NA bgData$Weight <- NA - ### update field data weights. This is a legacy item, and needs to be revisted. Currently set the weights for ALL background data to 1 if any weights were supplied with field data. User options for setting other weights. - if (any(!is.na(fieldDataSheet$Weight))) { - bgData$Weight <- 1 - } + fieldData <- rbind(fieldDataSheet, bgData) - ### subset cols - sel_cols = names(fieldDataSheet) - bgData = bgData[,..sel_cols]; rm(sel_cols) - fieldDataSheet <- rbind(fieldDataSheet, bgData) - fieldDataSheet$SiteID <- format(fieldDataSheet$SiteID, scientific = F) - - ## Extract covariate data for background sites ----- - ### loop through each - for (i in 1:nrow(covariateDataSheet)) { - ri = terra::rast(covariateDataSheet$RasterFilePath[i]) - vals = terra::extract( - ri, vbgPts, - method="simple", - cells=FALSE, - xy=FALSE, - ID=TRUE) - rm(ri); gc() - bgData[,covariateDataSheet$CovariatesID[i]:=vals[,2]] - # progressBar() - } - bgData[, SiteID := as.character(SiteID)] - - CovariatesID = covariateDataSheet$CovariatesID[!covariateDataSheet$CovariatesID %in% "mtpi_10m"] - - ### trim cols to format and melt to long format - keep_cols = c("SiteID", covariateDataSheet$CovariatesID) - bgData = bgData[,..keep_cols] - bgSiteData <- gather( - data = bgData, - key = CovariatesID, - value = Value, - -SiteID + ## Remove background points that occur in pixels with presence points ----- + + # rasterize points data + r <- rast( + ext(templateRaster), + resolution = res(templateRaster), + crs = crs(templateRaster) ) + pts <- vect(fieldData, geom = c("X", "Y"), crs = crs(templateRaster)) + rm(templateRaster, fieldData) + gc() - siteDataSheet <- rbind(siteDataSheet, bgSiteData) - siteDataSheet$SiteID <- format(siteDataSheet$SiteID, scientific = F) - siteDataSheet <- siteDataSheet %>% - distinct(SiteID, CovariatesID, .keep_all = T) - rm(bgSiteData) + rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2])) + cellPerPt <- terra::extract(rIDs, pts) + cellPerPt$SiteID <- pts$SiteID + names(cellPerPt)[2] <- "PixelID" + pts <- merge(pts, cellPerPt[, c("PixelID", "SiteID")]) + rm(rIDs, cellPerPt) gc() - # save site data to scenario - saveDatasheet(myScenario, siteDataSheet, "wisdm_SiteData") - rm(siteDataSheet) + # check for duplicate pixel ids + if (any(duplicated(pts$PixelID))) { + dups <- pts[which(duplicated(pts$PixelID)), ] + dropSites <- dups$SiteID[which(dups$Response == backgroundValue)] + pts <- pts[!pts$SiteID %in% dropSites, ] + rm(dups, dropSites) + } + + bgPts <- pts[pts$Response == backgroundValue] + bgPts$PixelID <- NULL + rm(pts) gc() + # remove extra bg sites + if (nrow(bgPts) > backgroundDataOptionsSheet$BackgroundSiteCount) { + bgPts <- sample( + x = bgPts, + size = backgroundDataOptionsSheet$BackgroundSiteCount, + replace = F + ) + } + if (nrow(bgPts) < backgroundDataOptionsSheet$BackgroundSiteCount) { + updateRunLog(paste0( + nrow(bgPts), + " psuedoabsence sites added to Field Data when ", + backgroundDataOptionsSheet$BackgroundSiteCount, + " were requested." + )) + # backgroundDataOptionsSheet$BackgroundSiteCount <- nrow(bgPts) + } + + progressBar() + + if (nrow(bgPts) == 0) { + updateRunLog( + "\nWarning: No background sites remained after filtering against presence pixels. No background data was added to Field Data.\n" + ) + } else { + ## Extract covariate data for background sites ----- + + # rasterize bg data + rastPts <- rasterize(bgPts, r) + matPts <- as.matrix(rastPts, wide = T) + keep <- which(!is.na(matPts)) + rm(rastPts) + gc() + + rIDs <- rast(r, vals = 1:(dim(r)[1] * dim(r)[2])) + cellPerPt <- terra::extract(rIDs, bgPts) + cellPerPt$SiteID <- bgPts$SiteID + names(cellPerPt)[2] <- "PixelID" + bgPts <- merge(bgPts, cellPerPt[, c("PixelID", "SiteID")]) + rm(rIDs) + gc() + + rPixels <- rasterize(bgPts, r, field = "PixelID") + matPixs <- as.matrix(rPixels, wide = T) + PixelIDs <- matPixs[keep] + PixelData <- data.frame(PixelID = PixelIDs) + rm(rPixels, matPixs, PixelIDs) + gc() + + for (i in 1:nrow(covariateDataSheet)) { + ri <- rast(covariateDataSheet$RasterFilePath[i]) + mi <- as.matrix(ri, wide = TRUE) + + outMat <- mi * matPts + vals <- outMat[keep] + rm(ri, mi, outMat) + gc() + + PixelData[covariateDataSheet$CovariatesID[i]] <- vals + progressBar() + } + + # convert background site data to long format and add to existing site datasheet + bgSiteData <- merge(cellPerPt[, c("PixelID", "SiteID")], PixelData) + bgSiteData$PixelID <- NULL + bgSiteData <- gather( + data = bgSiteData, + key = CovariatesID, + value = Value, + -SiteID + ) + + siteDataSheet <- rbind(siteDataSheet, bgSiteData) + siteDataSheet$SiteID <- format(siteDataSheet$SiteID, scientific = F) + siteDataSheet <- siteDataSheet %>% + distinct(SiteID, CovariatesID, .keep_all = T) + rm(bgSiteData) + gc() + + # save site data to scenario + saveDatasheet(myScenario, siteDataSheet, "wisdm_SiteData") + rm(siteDataSheet) + gc() + + # update field data + bgData <- bgData[which(bgData$SiteID %in% bgPts$SiteID), ] + if (any(!is.na(fieldDataSheet$Weight))) { + bgData$Weight <- 1 + } + + fieldDataSheet <- rbind(fieldDataSheet, bgData) + fieldDataSheet$SiteID <- format(fieldDataSheet$SiteID, scientific = F) + rm(bgData, bgPts) + gc() + } # end bgPts check + # save updated field data to scenario saveDatasheet(myScenario, fieldDataSheet, "wisdm_FieldData", append = F) } From 1404b1348220c0a72ec10fcd36202cce127d4ca2 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Thu, 16 Apr 2026 10:53:05 -0300 Subject: [PATCH 14/18] Revert "revised background generation function: improved speed and not use adehabitatHR library" This reverts commit 4e7f0b78ce4faa636e02f943a363b3db01235e3e. --- src/04-background-data-functions.R | 557 +++++++++++------------------ 1 file changed, 211 insertions(+), 346 deletions(-) diff --git a/src/04-background-data-functions.R b/src/04-background-data-functions.R index a79294f..1e296ef 100644 --- a/src/04-background-data-functions.R +++ b/src/04-background-data-functions.R @@ -4,377 +4,242 @@ ## ------------------------- # Background surface generation function --------------------------------------- -backgroundSurfacePointGeneration <- function( - sp, # species - n, # number of pseudoabsence points to generate - template, # template raster - outputDir, # output directory - dat, # field data - method) # includes method:('kde', 'mcp'); surface: ('continuous', 'binary'); isopleth -{ # start function - # TWO methods: - # KDE or MCP - # TWO options for KDE: - # continuous or binary (random within isopleth) - # ONE option for MCP: - # binary (random within isopleth) - # if KDE & continous: generate pts based on KDE surface only - # if KDE & binary: generate points randomly within isopleth given - # if MCP: generate points randomly within MCP, MCP is generaged by isopleth given - - ### ### ### ### - ### objects that all methods need - ### ### ### ### - # terra::terraOptions(tempdir=outputDir) - - - ### ### ### ### - ### objects that all methods need - ### ### ### ### - ### sf vector object of observations. will need to set CRS if obs are different - occ_sf <- sf::st_as_sf( - dat[dat$Response==1, c("X", "Y")], - coords = c("X", "Y"), - crs = crs(template), - remove = FALSE - ) - - if ('kde' %in% method) { - ### set outnames - kde_bg_out = gsub('/', '\\\\', paste0(outputDir, '/', sp, '_kde_bg_surface.tif')) - kde_pts_out = paste0(outputDir, '/', sp, '_kde_bg_pts.csv') - bg_pts_out = paste0(outputDir, '/', sp, '_kde_bg_pts.shp') - ### don't waste time generating points if they exist - if(file.exists(kde_pts_out)){ - stop("KDE background points already exist!\nDelete current points to proceed.") + +# library(sp) +# library(adehabitatHR) +# library(spatstat.geom) +# library(sf) + +backgroundSurfaceGeneration <- function(sp, # species + template, # template raster + mask, # spatVector object bounding the study area + outputDir, # output directory + dat, # field data + method) # c('kde', 'mcp') + { # start function + + xy <- sp::SpatialPoints(dat[, c('X', 'Y')]) + ud <- adehabitatHR::kernelUD(xy, extent = 0.5, grid = 150) + mm <- ud@coords[order(ud@coords[, 1]), ] + mm <- order(ud@coords[, 2]) + m <- ud@data$ud[mm] + + if ('kde' %in% method) { + if(tolower(method$surface) == "continuous"){ + kde_bg_out <- gsub('/', '\\\\', paste0(outputDir, '/', sp, '_kde_bg_surface.tif')) + kde.mat <- matrix(m, nrow = length(unique(ud@coords[, 1]))) + + t <- terra::rast(raster::raster(ud)) + ext(t) <- ext(ud) + crs(t) <- crs(template) + t <- t / max(kde.mat) * 100 # normalize from 0-100 + t <- terra::crop(t, ext(template)) + t <- terra::resample(x = t, y = template, method = 'near') + t <- terra::ifel(!is.na(template), t, NA) + + writeRaster(x = t, + filename = kde_bg_out, + gdal = c('BIGTIFF=YES', 'TILED=YES', 'COMPRESS=LZW'), + overwrite = T ) + } + if(tolower(method$surface) == 'binary'){ + ver <- adehabitatHR::getverticeshr(ud, method$isopleth) + bg_out <- paste0(outputDir, '/', sp, '_kde_bg_surface.shp') + } } - ### get cell resolution of template - template_res = terra::res(template)[1] - ### get bounding box of observations and buffer by 10 grid cells - bbox <- sf::st_bbox(c( - xmin = min(dat[,"X"]), - ymin = min(dat[,"Y"]), - xmax = max(dat[,"X"]), - ymax = max(dat[,"Y"])), - crs = crs(template)) - ### get cell width to use for kernel density. cell width determined here is approximately what's needed to create: - # 2,500 (50 x 50) - # 10,000 (100 x 100) - # 1,000,000 (1000 x 1000) - # tiles in bounding box of observations for generating the kernel density estimate. This keeps resolution reasonable, but still fast. - cell_width <- mean(c( - ((bbox$xmax - bbox$xmin) / 50), - ((bbox$ymax - bbox$ymin) / 50) - )) - ### expand bounding box by 1/2 width and length - buffer_width <- mean(c( - ((bbox$xmax - bbox$xmin) / 2), - ((bbox$ymax - bbox$ymin) / 2) - )) - bbox <- bbox |> - sf::st_as_sfc() |> - sf::st_buffer(dist = buffer_width)|> - sf::st_bbox() - ### create owin - win <- spatstat.geom::as.owin(bbox) - ### set window - OBSppp <- spatstat.geom::ppp(dat[,"X"], dat[,"Y"], window=win) - # plot(OBSppp) - ### set bandwidth for KDE. This should be a tunable parameter. sigma, estimated here, is ~ h/2, where h is the parameter estimated by kernealHD with the href method. - bw <- spatstat.explore::bw.ppl(OBSppp) |> round() * 2 # sigma = h/2, where h is estimated from adehabitatHR:kernelUD (original code) - npts <- spatstat.geom::npoints(OBSppp) - ### generate KDE layer - density_ppp <- spatstat.explore::density.ppp( - OBSppp, - sigma=bw, - weights = rep(1 / npts, npts), # ensure that intensity integrates to 1 - eps=cell_width) - r <- terra::rast(density_ppp) - terra::crs(r) <- terra::crs(template) + if("mcp" %in% method){ + ver <- adehabitatHR::mcp(xy, percent = method$isopleth) + bg_out <- paste0(outputDir, '/', sp, '_mcp_bg_surface.shp') + } - ### rescale weights. this is time consuming for large rasters. - terra::setMinMax(r, force=TRUE); mm=terra::minmax(r) - r <- (r-mm[1])/(mm[2]-mm[1]) * 100 - r <- round(r) - # set all values that are 0 to be NA - r <- ifel(r==0, NA, r) + if ("mcp" %in% method | tolower(method$surface) == 'binary') { + + WindowList <- list() + for (j in 1:length(ver@polygons[[1]]@Polygons)) { + x <- y <- vector() + x <- ver@polygons[[1]]@Polygons[[j]]@coords[, 1] + y <- ver@polygons[[1]]@Polygons[[j]]@coords[, 2] + xy <- paste(x, y, sep = "") + x <- x[!(duplicated(xy))] + y <- y[!(duplicated(xy))] + + MyWindow <- + try(spatstat.geom::owin(poly = list(x = x[length(x):1], y = y[length(x):1])), silent = + TRUE) + if (class(MyWindow) == "try-error") + MyWindow <- + spatstat.geom::owin(poly = list(x = x[1:length(x)], y = y[1:length(x)])) + ifelse(j == 1, WindowList <- MyWindow, { + ifelse( + spatstat.geom::is.subset.owin(MyWindow, WindowList), + WindowList <- spatstat.geom::setminus.owin(WindowList, MyWindow), + WindowList <- spatstat.geom::union.owin(MyWindow, WindowList) + ) + }) + } + + outVect <- terra::vect(sf::st_as_sf(WindowList)) + crs(outVect) <- terra::crs(template) + outVect <- terra::intersect(outVect, mask) + terra::writeVector(outVect, bg_out, overwrite = T) + } - ### create new template that is cropped to sampling grid - template_small <- terra::crop(template, r) - ### resample to template. this is time consuming for large rasters. - r <- terra::resample(r, template_small, method="bilinear") - ### crop & mask to template - r <- terra::mask(r, template_small) + } # end function + + +# Background point (psuedo-absence) generation function ------------------------- + +# library(data.table) + + +backgroundPointGeneration <- function(sp, # species name + n, # number of pseudoabsence points to generate + method, # c('kde','mcp') + outputDir, # output directory + target_file, # path to csv with growth form-specific points (target background point pool) + overwrite) # overwrite existing files + { # start function + + if('kde' %in% method){ - # ### write out raster - # # global(!is.na(r), "sum", na.rm=TRUE) - # terra::writeRaster(x = r, - # filename = kde_bg_out, - # overwrite=TRUE, - # gdal=c('COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES')) - ### get cells that are observations - occ_cellIDs <- terra::extract(r, vect(occ_sf), cells = TRUE) + kde_pts_out <- paste0(outputDir, '/', sp, '_kde_bg_pts.csv') + + if(file.exists(kde_pts_out)){ + stop("KDE background points already exist!\nDelete current points to proceed.") + } - ### IF CONTINOUS, SAMPLE WITH WEIGHTS if(tolower(method$surface) == "continuous"){ - pts <- data.table::data.table() - system.time({ - while(nrow(pts) < n){ - ### take spatially random sample all cells in OCC region - rnd_sample <- terra::spatSample( - x=r, - size=n*2, - method='random', - cells=TRUE, - xy=FALSE, - replace=TRUE, - as.raster=FALSE, - as.points=FALSE, - na.rm=TRUE) |> data.table::data.table() - names(rnd_sample)[1] = "cell" - names(rnd_sample)[2] = "weights" + ### Find KDE raster + r <- list.files(path = outputDir, pattern = 'kde.*.tif$', full.names = T) + r <- terra::rast(r) + + valid <- data.table::data.table() + + # you can extract many values, very quickly with terra + inc <- 10000000 + total <- terra::ncell(r) + + # since we're not ever going to put a point on the kde surface where values + # are < 1, surfaces can be subset to the *truly* available values. For + # species with very limited distributions, this dramatically decreases the + # amount of time the code spends searching for valid bg locations. + + if(total > inc){ + + cat('Finding viable cells...\n') + + pb <- txtProgressBar(0, 100, style = 3) + + for(i in seq(1,ncell(r),inc)){ - ### drop cells that are occurrence - rnd_sample[rnd_sample$cell%in%occ_cellIDs$cell, "weights"] <- 0 + tmp <- terra::extract(r, i:(i + inc)) + tmp$id <- c(i:(i + inc)) + tmp$kde_bg_surface <- ifelse(tmp[ ,1] < 1, NA, tmp[ ,1]) + tmp <- na.omit(tmp) - ### sample the sample with weights - selected_cells <- sample( - rnd_sample$cell, - size = 1000, - replace = TRUE, - prob = rnd_sample$weights - ) + valid <- rbind(valid, tmp) + + perc_complete <- ((i + inc) / total) * 100 + + setTxtProgressBar(pb, ifelse(perc_complete <= 100, perc_complete, 100)) - ### combine sample set with complete set, drop duplicates if any - pts <- rbind(pts, data.table::as.data.table(xyFromCell(r, selected_cells))) - pts <- unique(pts) - pts <- pts[complete.cases(pts), ] } - }) - ### reduce pts to size of background points requested. random sample is all that's needed here. not sampling by prob. Just thinning to limit size - if(nrow(pts) > n){ - pts_ind <- sample(nrow(pts), size=n, replace=FALSE) - pts <- pts[pts_ind,] + + close(pb) + + } else { + + tmp <- terra::extract(r, 1:inc) + tmp$id <- 1:inc + tmp$kde_bg_surface <- ifelse(tmp[ ,1] < 1, NA, tmp[ ,1]) + tmp <- na.omit(tmp) + + valid <- rbind(valid, tmp) + } - ### write out - names(pts) <- c("X", "Y") - pts$Response <- -9998 - data.table::fwrite(pts, kde_pts_out) - # occ_sf <- sf::st_as_sf( - # pts[, c("X", "Y")], - # coords = c("X", "Y"), - # crs = crs(template), - # remove = FALSE - # ) |> terra::vect() - # terra::writeVector(occ_sf, bg_pts_out, overwrite = T) - - } # end tolower(method$surface) == "continuous" - - ### if BINARY, SAMPLE WITH OUT WEIGHTS - if(tolower(method$surface) == 'binary'){ - ### set outnames - # Normalize to create a probability surface - total_sum <- terra::global(r, "sum", na.rm = TRUE)[1,1] - f <- r / total_sum - # Get values and cell numbers for non-NA cells only - v <- terra::values(f, mat = FALSE) - non_na_cells <- which(!is.na(v)) - vals <- v[non_na_cells]; rm(v); gc() - # Get coordinates of non-NA cells using cell numbers - coords <- terra::xyFromCell(f, non_na_cells); rm(non_na_cells); gc() - # Sort values and coords by decreasing probability - ord <- order(vals, decreasing = TRUE) - vals_sorted <- vals[ord]; rm(vals); gc() - # coords_sorted <- coords[ord, ]; rm(coords); gc() - # Cumulative sum to find 95% threshold - cum_prob <- cumsum(vals_sorted) - isopleth <- method$isopleth/100 - idx <- which(cum_prob >= isopleth)[1] - threshold <- vals_sorted[idx]; rm(idx, vals_sorted); gc() - # Create binary mask of cells >= threshold - outRast <- terra::ifel(r >= threshold, 1, NA) - ### mask to template to get holes - outRast <- mask(outRast, template_small) - # ### write out raster - # terra::writeRaster(x <- outRast, - # filename = kde_bg_out, - # overwrite=TRUE, - # gdal=c('COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES')) - # plot(template) - # plot(outVect, add=TRUE) - # plot(pts, add=TRUE) - - ### get CELL IDs of observations - occ_cellIDs <- terra::extract(outRast, vect(occ_sf), cells = TRUE) - - ### RANDOMLY SAMPLE RASTER WITH NO WEIGHTS - ### then remove occ locations & dups and sample again. pts <- data.table::data.table() + used <- data.table::data.table() + while(nrow(pts) < n){ - ### take spatially random sample of entire study area - system.time({ - rnd_sample <- terra::spatSample( - outRast, - size=n*2, - method='random', - cells=TRUE, - xy=TRUE, - replace=FALSE, - as.raster=FALSE, - as.points=FALSE, - na.rm=TRUE) |> data.table::data.table() - names(rnd_sample)[1] = "cell" - names(rnd_sample)[2] = "X" - names(rnd_sample)[3] = "Y" - }) # - ### drop cells that are occurrence - rnd_sample <- rnd_sample[!rnd_sample$cell%in%occ_cellIDs$cell, ] + tmp <- valid[sample(nrow(valid), 1000),] + tmp <- tmp[!(tmp$id %in% used$id), ] + + if(nrow(tmp) == 0) next() + + used <- rbind(used, tmp) - ### combine sample set with complete set, drop duplicates if any - pts <- rbind(pts, rnd_sample) - pts <- unique(pts) - pts <- pts[complete.cases(pts), ] + # generate column of random integers between 1 and 100. + # If the id field is >= this number, it is retained. + tmp[, test := as.numeric(sample(1:100, nrow(tmp), replace = T))] + + # keep true values only + tmp <- tmp[, retain := ifelse(tmp[,1] >= test, T, F)][retain == T] + + if(nrow(tmp) > 0){ + pts <- rbind(pts, tmp) + } else { + next() + } } - ### reduce pts to size of background points requested. random sample is all that's needed here. not sampling by prob. Just thinning to limit size + if(nrow(pts) > n){ - pts_ind <- sample(nrow(pts), size=n, replace=FALSE) - pts <- pts[pts_ind,] + pts <- pts[sample(1:nrow(pts), n, prob = pts[[1]], replace = F), ] } - ### write out - pts$Response <- -9998 - data.table::fwrite(pts, kde_pts_out) - # occ_sf <- sf::st_as_sf( - # pts[, c("X", "Y")], - # coords = c("X", "Y"), - # crs = crs(template), - # remove = FALSE - # ) |> terra::vect() - # terra::writeVector(occ_sf, bg_pts_out, overwrite = T) - } # end tolower(method$surface) == 'binary' - } # end 'kde' %in% method - - if("mcp" %in% method){ - ### set outnames - mcp_pts_out <- paste0(outputDir, '/', sp, '_mcp_bg_pts.csv') - bg_pts_out <- paste0(outputDir, '/', sp, '_mcp_bg_pts.shp') - ### don't waste time generating points if they exist - if(file.exists(mcp_pts_out)){ - stop("MCP background points already exist!\nDelete current points to proceed.") + + # convert to spatVector and write out + v <- as.data.frame(terra::xyFromCell(r, pts[[2]])) + v$Response <- c(rep(backgroundValue, n)) + colnames(v) <- c("X", "Y", "Response") + write.csv(v, kde_pts_out, row.names = F) + + cat(nrow(v), 'pseudoabsence points generated for continuous KDE surface\n') } - # get X,Y coords - xy <- fieldDataSheet[,c("X", "Y")] - n_vals <- nrow(xy) - if (n_vals < 3L){ - stop("Need at least 3 points for a polygon.") + if(tolower(method$surface) == "binary"){ + # v <- sf::st_read(list.files(path = outputDir, pattern = 'kde.*shp', full.names = T), quiet = TRUE) + # + # if(missing(target_file)) stop('Missing path to target bg dataset!') + # pts <- readr::read_csv(target_file, show_col_types = F, progress = F) |> + # dplyr::select(1:2) |> + # sf::st_as_sf(coords = c('X','Y'), crs = sf::st_crs(v)) + # + # test <- sf::st_intersects(v, pts, sparse = T) + # pts <- pts[unlist(test),] |> unique() + # + # if(nrow(pts) > n){ + # pts <- pts[sample(nrow(pts), n), ] + # } + # + # pts <- as.data.frame(sf::st_coordinates(pts)) + + v <- vect(list.files(path = outputDir, pattern = 'kde.*shp', full.names = T)) + pts <- crds(spatSample(v, n, "random")) + + pts <- as.data.frame(pts) + names(pts) <- c("X", "Y") + pts$Response <- backgroundValue + write.csv(pts, kde_pts_out, row.names = F) + + cat(nrow(pts), 'pseudoabsence points generated for binary KDE surface\n') } + } + + if('mcp' %in% method){ - # center points to calc isopleth - cx <- mean(xy[, 1]) - cy <- mean(xy[, 2]) - - # Squared distances (no sqrt: avoids extra cost) - d2 <- (xy[, 1] - cx)^2 + (xy[, 2] - cy)^2 - rm(cx, cy) - - # threshold with quantile from isopleth - thr <- stats::quantile(d2, probs = method$isopleth / 100, names = FALSE, type = 7) - keep <- d2 <= thr - rm(d2, thr) - - if (sum(keep) < 3L) stop("Fewer than 3 points remain after trimming.") - - # Convex hull indices on trimmed set - id_trim <- which(keep) - hull_local <- grDevices::chull(xy[keep, 1], xy[keep, 2]) - hull_idx <- id_trim[hull_local] - - # Close polygon ring - poly_coords <- rbind(xy[hull_idx, , drop = FALSE], xy[hull_idx[1], , drop = FALSE]) - - # convert to polygon - polygons <- poly_coords |> - sf::st_as_sf(coords = c("X", "Y"), crs = crs(template)) |> - summarize(geometry = sf::st_combine(geometry)) |> - sf::st_cast("POLYGON") - - ### set path for bg surface - bg_out <- paste0(outputDir, '/', sp, '_mcp_bg_surface.tif') - - ### convert to vect & set crs - outVect <- terra::vect(polygons); rm(polygons) - crs(outVect) <- terra::crs(template) - - ### create new template that is cropped to sampling grid - template_small <- terra::crop(template, outVect) - - ### convert to raster - outRast <- rasterize(outVect, template_small) - - ### mask to template to get holes - outRast <- mask(outRast, template_small) - - # ### write out raster - # terra::writeRaster(x <- outRast, - # filename = bg_out, - # overwrite=TRUE, - # gdal=c('COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES')) + mcp_pts_out <- paste0(outputDir, '/', sp, '_mcp_bg_pts.csv') - # plot(template) - # plot(outVect, add=TRUE) - # plot(pts, add=TRUE) + v <- vect(list.files(path = outputDir, pattern = 'mcp.*shp', full.names = T)) + pts <- crds(spatSample(v, n, "random")) - ### get CELL IDs of observations - occ_cellIDs <- terra::extract(outRast, vect(occ_sf), cells = TRUE) + pts <- as.data.frame(pts) + names(pts) <- c("X", "Y") + pts$Response <- backgroundValue + write.csv(pts, mcp_pts_out, row.names = F) - ### RANDOMLY SAMPLE RASTER WITH NO WEIGHTS - ### then remove occ locations & dups and sample again. - pts <- data.table::data.table() - while(nrow(pts) < n){ - ### take spatially random sample of entire study area - system.time({ - rnd_sample <- terra::spatSample( - outRast, - size=n*2, - method='random', - cells=TRUE, - xy=TRUE, - replace=FALSE, - as.raster=FALSE, - as.points=FALSE, - na.rm=TRUE) |> data.table::data.table() - names(rnd_sample)[1] = "cell" - names(rnd_sample)[2] = "X" - names(rnd_sample)[3] = "Y" - }) # - - ### drop out cells that are occurrence - rnd_sample <- rnd_sample[!rnd_sample$cell%in%occ_cellIDs$cell, ] - - ### combine sample set with complete set, drop duplicates if any - pts <- rbind(pts, rnd_sample) - pts <- unique(pts) - pts <- pts[complete.cases(pts), ] - } - ### reduce pts to size of background points requested. random sample is all that's needed here. not sampling by prob. Just thinning to limit size - if(nrow(pts) > n){ - pts_ind <- sample(nrow(pts), size=n, replace=FALSE) - pts <- pts[pts_ind,] - } - ### write out - pts$Response <- -9998 - data.table::fwrite(pts, mcp_pts_out) - # occ_sf <- sf::st_as_sf( - # pts[, c("X", "Y")], - # coords = c("X", "Y"), - # crs = crs(template), - # remove = FALSE - # ) |> terra::vect() - # terra::writeVector(occ_sf, bg_pts_out, overwrite = T) - } # end "mcp" %in% method + cat(nrow(pts), 'pseudoabsence points generated for MCP surface\n') + } -} # end backgroundSurfacePointGeneration - +} # End Function \ No newline at end of file From 3efe23d28771eef29e5f6bfad84be8aec6ed7833 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Fri, 8 May 2026 11:16:02 -0300 Subject: [PATCH 15/18] add offset so all jobs don't fire at the same time --- src/9-apply-model.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/9-apply-model.R b/src/9-apply-model.R index 4fcc4d1..d88a87b 100644 --- a/src/9-apply-model.R +++ b/src/9-apply-model.R @@ -176,7 +176,8 @@ if (name(myLibrary) == "Partial") { sessionDetails <- setup_session( ssim_temp_dir = ssimTempDir, concurrent_sessions = maxJobs, - total_ram_gb = totalMem + total_ram_gb = totalMem, + desync_max_sec = 10 ) progressBar() From 82e5c8392fd78fdb04d069bf13bd8841213f0401 Mon Sep 17 00:00:00 2001 From: spearmangillman Date: Fri, 8 May 2026 11:19:07 -0300 Subject: [PATCH 16/18] minor documentation updates --- docs/_scenarios/section2.md | 8 ++++---- docs/_scenarios/section3.md | 2 ++ docs/_scenarios/section6.md | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/_scenarios/section2.md b/docs/_scenarios/section2.md index 8d71b26..c9b131b 100644 --- a/docs/_scenarios/section2.md +++ b/docs/_scenarios/section2.md @@ -121,10 +121,10 @@ Here are some short descriptions of each stage: * Required inputs: Template Raster File, Field Data, Covariate Data. * Outputs: Updated Field Data and Site Data. -4. Background Data Generation: - * Generates background sites (pseudoabsences), extracts covariate data for the pseudoabsence sites and updates the Field Data and Site Data to include pseudoabsence data. - * Required inputs: Template Raster File, Field Data, Covariate Data, Site Data. - * Outputs: Updated Field Data and updated Site Data. +4. Background Data Generation: + * Generates background sites (pseudoabsences), extracts covariate data for the pseudoabsence sites and updates the Field Data and Site Data to include pseudoabsence data. + * Required inputs: Template Raster File, Field Data, Covariate Data, Site Data. + * Outputs: Updated Field Data and updated Site Data. 5. Prepare Training/Testing Data: * Divides Field Data into training and testing sets and/or cross validation folds based on the provided Validation Options arguments. diff --git a/docs/_scenarios/section3.md b/docs/_scenarios/section3.md index 39765e0..e8fc3a9 100644 --- a/docs/_scenarios/section3.md +++ b/docs/_scenarios/section3.md @@ -151,6 +151,8 @@ The **Field Data Options** *Datasheet* can be found under the **Field Data** tab The **Background Data Options** *Datasheet* controls some of the *Scenario's* settings relating to the **Field Data**. +> If the *Background Data Generation* stage is re-run (e.g., when running a dependent scenario or re-running an existing scenario), any existing background sites already present in the **Field Data** will be removed and regenerated based on the current **Background Data Options**. To preserve existing background data instead of regenerating it, set *Generate background sites* to "No". + ### **Generate background sites** Defines whether background sites should be generated for the *Scenario*. Background sites are also referred to as pseudo-absences and represent absences of a species. This information allows the models to compare environmental spaces where a species can and cannot be found. Background sites are often used when true absence data are not available for a species. diff --git a/docs/_scenarios/section6.md b/docs/_scenarios/section6.md index 8fa3862..25d77b5 100644 --- a/docs/_scenarios/section6.md +++ b/docs/_scenarios/section6.md @@ -115,6 +115,8 @@ The **Probability Map** is the main output of the fitted model and shows probabi ### **MESS Map** The **MESS Map** is the Multivariate Environmental Similarity Surface, which represents values as positive, negative, or zero. This map shows how well locations on the **Template Raster** fit into the range of covariate data to which the training data were fit. Positive areas on this map represent areas where the covariate ranges are more similar to those to which the training data of the model were fit. Negative areas on this map represent areas where the covariate ranges are not similar to those to which the training data of the model were fit. Values of zero on this map represent areas where ranges of covariate data at these locations and ranges of covariate data to which the training data of the model were fit are marginally similar [(Elith et al., 2010)](https://doi.org/10.1111/j.2041-210X.2010.00036.x). +> MESS and MoD maps are computed on continuous variables only. If the model includes categorical variables, those variables are excluded from the calculation and a warning is issued. If all model variables are categorical, MESS and MoD maps cannot be generated and will be skipped. + ### **MoD Map** The **MoD Map** is a map of the most dissimilar variable. This map is similar to the **MESS Map** in that it shows regions where covariate ranges were most dissimilar from those used to fit the training data. However, this map shows which covariates used in the model was furthest from the range of the observations used for model training and where. From b5f393a88cc1f2a0711ab24c8575632a2faa0014 Mon Sep 17 00:00:00 2001 From: Skye Pearman-Gillman <41083385+spearmangillman@users.noreply.github.com> Date: Mon, 11 May 2026 15:22:17 -0300 Subject: [PATCH 17/18] Update src/08-fit-model-functions.R coderabbit update Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- src/08-fit-model-functions.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/08-fit-model-functions.R b/src/08-fit-model-functions.R index 2603a62..790d8c1 100644 --- a/src/08-fit-model-functions.R +++ b/src/08-fit-model-functions.R @@ -691,7 +691,13 @@ runMaxent <- function( args <- c(args, "redoifexists", "autorun") - system2("java", args = args) + exitCode <- system2("java", args = args) + if (exitCode != 0) { + stop(paste0( + "MaxEnt execution failed with exit code ", exitCode, + ". Check Java installation and MaxEnt configuration." + )) + } } ### Read Maxent ---------------------------------------------------------------- From 0ccaabd9065e6e4171401938b5c85bda26d67533 Mon Sep 17 00:00:00 2001 From: Skye Pearman-Gillman <41083385+spearmangillman@users.noreply.github.com> Date: Wed, 13 May 2026 14:01:31 -0300 Subject: [PATCH 18/18] Simplify Java version check across platforms Replaced platform-specific Java version check with a unified system2 call. --- src/08-fit-model-functions.R | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/08-fit-model-functions.R b/src/08-fit-model-functions.R index 790d8c1..e71ee46 100644 --- a/src/08-fit-model-functions.R +++ b/src/08-fit-model-functions.R @@ -597,16 +597,7 @@ checkJava <- function() { # run the command and capture exit code status <- tryCatch( { - if (os == "Windows") { - shell( - "java -version", - intern = FALSE, - ignore.stdout = TRUE, - ignore.stderr = TRUE - ) - } else { - system("java -version", ignore.stdout = TRUE, ignore.stderr = TRUE) - } + system2("java", "-version", stdout = FALSE, stderr = FALSE) }, error = function(e) 1L )