From dfc365743e14dcbbcebc1090ec29ad7df4793129 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Tue, 6 Aug 2024 16:49:21 -0700 Subject: [PATCH 1/8] calculate b_k using table 5 --- R/simpol1.R | 121 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 35 deletions(-) diff --git a/R/simpol1.R b/R/simpol1.R index dbd824f..f09f0a2 100644 --- a/R/simpol1.R +++ b/R/simpol1.R @@ -50,51 +50,55 @@ #' sdf <- ChemmineR::read.SDFset(mol_path) #' fx_groups <- get_fx_groups(sdf) #' simpol1(fx_groups) -simpol1 <- function(fx_groups, meredith = TRUE) { - betas <- fx_groups %>% +simpol1 <- function(fx_groups, temp_k, meredith = TRUE) { + b_k <- calc_bk_simpol1(temp_k) + + contributions <- fx_groups %>% # assume NAs are 0s for the sake of this calculation dplyr::mutate(dplyr::across(dplyr::where(is.integer), function(x) ifelse(is.na(x), 0L, x))) %>% # TODO: There is also a table of coefs and equation to calculate b_k(T) at # different values of T. Why wasn't this implemented? + dplyr::mutate( # multiplier for each functional group is volatility contribution to log10P # b_k(T) * v_k,i - b_00 = (1.79 * 1), #b_0(T) is an intercept/constant. - b_01 = (-0.438 * .data$carbons), - b_02 = (-0.0338 * .data$carbons_asa), - b_03 = (-0.675 * .data$rings_aromatic), - b_04 = (-0.0104 * .data$rings_aliphatic), - b_05 = (-0.105 * .data$carbon_dbl_bonds_aliphatic), - b_06 = (-0.506 * .data$CCCO_aliphatic_ring), - b_07 = (-2.23 * .data$hydroxyl_aliphatic), - b_08 = (-1.35 * .data$aldehydes), - b_09 = (-0.935 * .data$ketones), - b_10 = (-3.58 * .data$carbox_acids), - b_11 = (-1.20 * .data$ester), - b_12 = (-0.718 * .data$ether_alkyl), - b_13 = (-0.683 * .data$ether_alicyclic), - b_14 = (-1.03 * .data$ether_aromatic), - b_15 = (-2.23 * .data$nitrate), - b_16 = (-2.15 * .data$nitro), - b_17 = (-2.14 * .data$hydroxyl_aromatic), - b_18 = (-1.03 * .data$amine_primary), - b_19 = (-0.849 * .data$amine_secondary), - b_20 = (-0.608 * .data$amine_tertiary), - b_21 = (-1.61 * .data$amine_aromatic), - b_22 = (-4.49 * .data$amide_primary), - b_23 = (-5.26 * .data$amide_secondary), - b_24 = (-2.63 * .data$amide_tertiary), - b_25 = (-2.34 * .data$carbonylperoxynitrate), - b_26 = (-0.368 * .data$peroxide), - b_27 = (-2.48 * .data$hydroperoxide), - b_28 = (-2.48 * .data$carbonylperoxyacid), - b_29 = (0.0432 * .data$nitrophenol), - b_30 = (-2.67 * .data$nitroester) + b_00 = (b_k["b0"] * 1), #b_0(T) is an intercept/constant. + b_01 = (b_k["b1"] * .data$carbons), + b_02 = (b_k["b2"] * .data$carbons_asa), + b_03 = (b_k["b3"] * .data$rings_aromatic), + b_04 = (b_k["b4"] * .data$rings_aliphatic), + b_05 = (b_k["b5"] * .data$carbon_dbl_bonds_aliphatic), + b_06 = (b_k["b6"] * .data$CCCO_aliphatic_ring), + b_07 = (b_k["b7"] * .data$hydroxyl_aliphatic), + b_08 = (b_k["b8"] * .data$aldehydes), + b_09 = (b_k["b9"] * .data$ketones), + b_10 = (b_k["b10"] * .data$carbox_acids), + b_11 = (b_k["b11"] * .data$ester), + b_12 = (b_k["b12"] * .data$ether_alkyl), + b_13 = (b_k["b13"] * .data$ether_alicyclic), + b_14 = (b_k["b14"] * .data$ether_aromatic), + b_15 = (b_k["b15"] * .data$nitrate), + b_16 = (b_k["b16"] * .data$nitro), + b_17 = (b_k["b17"] * .data$hydroxyl_aromatic), + b_18 = (b_k["b18"] * .data$amine_primary), + b_19 = (b_k["b19"] * .data$amine_secondary), + b_20 = (b_k["b20"] * .data$amine_tertiary), + b_21 = (b_k["b21"] * .data$amine_aromatic), + b_22 = (b_k["b22"] * .data$amide_primary), + b_23 = (b_k["b23"] * .data$amide_secondary), + b_24 = (b_k["b24"] * .data$amide_tertiary), + b_25 = (b_k["b25"] * .data$carbonylperoxynitrate), + b_26 = (b_k["b26"] * .data$peroxide), + b_27 = (b_k["b27"] * .data$hydroperoxide), + b_28 = (b_k["b28"] * .data$carbonylperoxyacid), + b_29 = (b_k["b29"] * .data$nitrophenol), + b_30 = (b_k["b30"] * .data$nitroester) ) if (isTRUE(meredith)) { - betas <- betas %>% + contributions <- contributions %>% + #TODO: either scale these with temp_k or don't allow use with temp_k other than 293.15 dplyr::mutate( # # Below are additions from Meredith et al. b_32 = (-2.23 * .data$phosphoric_acids), @@ -106,9 +110,56 @@ simpol1 <- function(fx_groups, meredith = TRUE) { ) } - betas %>% + contributions %>% dplyr::rowwise() %>% dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("b_")))) %>% dplyr::ungroup() %>% dplyr::select(-dplyr::starts_with("b_")) +} + + +#from table 5 of Pankow & Asher +calc_bk_simpol1 <- function(temp_k = 293.15) { + table_5 <- tibble::tribble( + ~k, ~B1, ~B2, ~B3, ~B4, + 0, -4.26938E+02, 2.89223E-01, 4.42057E-03, 2.92846E-01, + 1, -4.11248E+02, 8.96919E-01, -2.48607E-03, 1.40312E-01, + 2, -1.46442E+02, 1.54528E+00, 1.71021E-03, -2.78291E-01, + 3, 3.50262E+01, -9.20839E-01, 2.24399E-03, -9.36300E-02, + 4, -8.72770E+01, 1.78059E+00, -3.07187E-03, -1.04341E-01, + 5, 5.73335E+00, 1.69764E-02, -6.28957E-04, 7.55434E-03, + 6, -2.61268E+02, -7.63282E-01, -1.68213E-03, 2.89038E-01, + 7, -7.25373E+02, 8.26326E-01, 2.50957E-03, -2.32304E-01, + 8, -7.29501E+02, 9.86017E-01, -2.92664E-03, 1.78077E-01, + 9, -1.37456E+01, 5.23486E-01, 5.50298E-04, -2.76950E-01, + 10, -7.98796E+02, -1.09436E+00, 5.24132E-03, -2.28040E-01, + 11, -3.93345E+02, -9.51778E-01, -2.19071E-03, 3.05843E-01, + 12, -1.44334E+02, -1.85617E+00, -2.37491E-05, 2.88290E-01, + 13, 4.05265E+01, -2.43780E+00, 3.60133E-03, 9.86422E-02, + 14, -7.07406E+01, -1.06674E+00, 3.73104E-03, -1.44003E-01, + 15, -7.83648E+02, -1.03439E+00, -1.07148E-03, 3.15535E-01, + 16, -5.63872E+02, -7.18416E-01, 2.63016E-03, -4.99470E-02, + 17, -4.53961E+02, -3.26105E-01, -1.39780E-04, -3.93916E-02, + 18, 3.71375E+01, -2.66753E+00, 1.01483E-03, 2.14233E-01, + 19, -5.03710E+02, 1.04092E+00, -4.12746E-03, 1.82790E-01, + 20, -3.59763E+01, -4.08458E-01, 1.67264E-03, -9.98919E-02, + 21, -6.09432E+02, 1.50436E+00, -9.09024E-04, -1.35495E-01, + 22, -1.02367E+02, -7.16253E-01, -2.90670E-04, -5.88556E-01, + 23, -1.93802E+03, 6.48262E-01, 1.73245E-03, 3.47940E-02, + 24, -5.26919E+00, 3.06435E-01, 3.25397E-03, -6.81506E-01, + 25, -2.84042E+02, -6.25424E-01, -8.22474E-04, -8.80549E-02, + 26, 1.50093E+02, 2.39875E-02, -3.37969E-03, 1.52789E-02, + 27, -2.03387E+01, -5.48718E+00, 8.39075E-03, 1.07884E-01, + 28, -8.38064E+02, -1.09600E+00, -4.24385E-04, 2.81812E-01, + 29, -5.27934E+01, -4.63689E-01, -5.11647E-03, 3.84965E-01, + 30, -1.61520E+03, 9.01669E-01, 1.44536E-03, 2.66889E-01 + ) + + b_k_df <- + dplyr::mutate(table_5, b_k = B1/temp_k + B2 + B3*temp_k + B4*log(temp_k)) + + out <- b_k_df$b_k + names(out) <- paste0("b", b_k_df$k) + #return: + out } \ No newline at end of file From 09d2597f728e931c003ac3292264a3933ed3b427 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Fri, 9 Aug 2024 14:08:19 -0700 Subject: [PATCH 2/8] finish updating simpol1 to allow for specification of temperature --- DESCRIPTION | 2 +- R/simpol1.R | 116 +++++++++++++++++++++++++++---------------------- man/simpol1.Rd | 29 +++++++++---- 3 files changed, 86 insertions(+), 61 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 763a016..865a98d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,4 +46,4 @@ biocViews: Config/testthat/edition: 3 Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 diff --git a/R/simpol1.R b/R/simpol1.R index f09f0a2..71b9970 100644 --- a/R/simpol1.R +++ b/R/simpol1.R @@ -12,24 +12,29 @@ #' atmospheres. #' #' The modified method in Meredith et al. (2023) adds the following additional -#' functional groups and coefficients: +#' functional groups: #' -#' - Phosphoric acid (-2.23) -#' - Phosphoric ester (-2.23) -#' - Sulfate (-2.23) -#' - Sulfonate (-2.23) -#' - Thiol (-2.23) -#' - Carbothioester (-1.20) +#' Using the same coefficient as nitrate #' -#' @note The method described in Pankow & Asher (2008) allows for -#' calculations of logP at different temperatures. This implementation -#' currently only calculates values at 20ºC. +#' - Phosphoric acid +#' - Phosphoric ester +#' - Sulfate +#' - Sulfonate +#' - Thiol #' +#' Using the same coefficient as ester +#' - Carbothioester +#' +#' @note The method described in Pankow & Asher (2008) was developed using data +#' between 0ºC and 120ºC, so extrapolating beyond that range is not +#' recommended. +#' #' @param fx_groups A data.frame or tibble with counts of functional groups #' produced by [get_fx_groups()] (or manually, with the same column names). #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`: #' use the modified version in Meredith et al. (2023). -#' +#' @param temp_c Numeric; the temperature at which to calculate volatility +#' estimates. #' @returns The `fx_groups` tibble with the additional `log10_P` column. #' #' @references @@ -50,7 +55,13 @@ #' sdf <- ChemmineR::read.SDFset(mol_path) #' fx_groups <- get_fx_groups(sdf) #' simpol1(fx_groups) -simpol1 <- function(fx_groups, temp_k, meredith = TRUE) { +simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) { + #convert ºC to K + if (temp_c < 0 | temp_c > 120) { + warning("Temperatures below 0ºC or above 120ºC extrapolate beyond the range SIMPOL.1 was intended for. \n Interpret results with caution!") + } + temp_k <- temp_c + 273.15 + b_k <- calc_bk_simpol1(temp_k) contributions <- fx_groups %>% @@ -63,58 +74,59 @@ simpol1 <- function(fx_groups, temp_k, meredith = TRUE) { dplyr::mutate( # multiplier for each functional group is volatility contribution to log10P # b_k(T) * v_k,i - b_00 = (b_k["b0"] * 1), #b_0(T) is an intercept/constant. - b_01 = (b_k["b1"] * .data$carbons), - b_02 = (b_k["b2"] * .data$carbons_asa), - b_03 = (b_k["b3"] * .data$rings_aromatic), - b_04 = (b_k["b4"] * .data$rings_aliphatic), - b_05 = (b_k["b5"] * .data$carbon_dbl_bonds_aliphatic), - b_06 = (b_k["b6"] * .data$CCCO_aliphatic_ring), - b_07 = (b_k["b7"] * .data$hydroxyl_aliphatic), - b_08 = (b_k["b8"] * .data$aldehydes), - b_09 = (b_k["b9"] * .data$ketones), - b_10 = (b_k["b10"] * .data$carbox_acids), - b_11 = (b_k["b11"] * .data$ester), - b_12 = (b_k["b12"] * .data$ether_alkyl), - b_13 = (b_k["b13"] * .data$ether_alicyclic), - b_14 = (b_k["b14"] * .data$ether_aromatic), - b_15 = (b_k["b15"] * .data$nitrate), - b_16 = (b_k["b16"] * .data$nitro), - b_17 = (b_k["b17"] * .data$hydroxyl_aromatic), - b_18 = (b_k["b18"] * .data$amine_primary), - b_19 = (b_k["b19"] * .data$amine_secondary), - b_20 = (b_k["b20"] * .data$amine_tertiary), - b_21 = (b_k["b21"] * .data$amine_aromatic), - b_22 = (b_k["b22"] * .data$amide_primary), - b_23 = (b_k["b23"] * .data$amide_secondary), - b_24 = (b_k["b24"] * .data$amide_tertiary), - b_25 = (b_k["b25"] * .data$carbonylperoxynitrate), - b_26 = (b_k["b26"] * .data$peroxide), - b_27 = (b_k["b27"] * .data$hydroperoxide), - b_28 = (b_k["b28"] * .data$carbonylperoxyacid), - b_29 = (b_k["b29"] * .data$nitrophenol), - b_30 = (b_k["b30"] * .data$nitroester) + vb_00 = (b_k["b0"] * 1), #b_0(T) is an intercept/constant. + vb_01 = (b_k["b1"] * .data$carbons), + vb_02 = (b_k["b2"] * .data$carbons_asa), + vb_03 = (b_k["b3"] * .data$rings_aromatic), + vb_04 = (b_k["b4"] * .data$rings_aliphatic), + vb_05 = (b_k["b5"] * .data$carbon_dbl_bonds_aliphatic), + vb_06 = (b_k["b6"] * .data$CCCO_aliphatic_ring), + vb_07 = (b_k["b7"] * .data$hydroxyl_aliphatic), + vb_08 = (b_k["b8"] * .data$aldehydes), + vb_09 = (b_k["b9"] * .data$ketones), + vb_10 = (b_k["b10"] * .data$carbox_acids), + vb_11 = (b_k["b11"] * .data$ester), + vb_12 = (b_k["b12"] * .data$ether_alkyl), + vb_13 = (b_k["b13"] * .data$ether_alicyclic), + vb_14 = (b_k["b14"] * .data$ether_aromatic), + vb_15 = (b_k["b15"] * .data$nitrate), + vb_16 = (b_k["b16"] * .data$nitro), + vb_17 = (b_k["b17"] * .data$hydroxyl_aromatic), + vb_18 = (b_k["b18"] * .data$amine_primary), + vb_19 = (b_k["b19"] * .data$amine_secondary), + vb_20 = (b_k["b20"] * .data$amine_tertiary), + vb_21 = (b_k["b21"] * .data$amine_aromatic), + vb_22 = (b_k["b22"] * .data$amide_primary), + vb_23 = (b_k["b23"] * .data$amide_secondary), + vb_24 = (b_k["b24"] * .data$amide_tertiary), + vb_25 = (b_k["b25"] * .data$carbonylperoxynitrate), + vb_26 = (b_k["b26"] * .data$peroxide), + vb_27 = (b_k["b27"] * .data$hydroperoxide), + vb_28 = (b_k["b28"] * .data$carbonylperoxyacid), + vb_29 = (b_k["b29"] * .data$nitrophenol), + vb_30 = (b_k["b30"] * .data$nitroester) ) if (isTRUE(meredith)) { contributions <- contributions %>% - #TODO: either scale these with temp_k or don't allow use with temp_k other than 293.15 dplyr::mutate( # # Below are additions from Meredith et al. - b_32 = (-2.23 * .data$phosphoric_acids), - b_33 = (-2.23 * .data$phosphoric_esters), - b_34 = (-2.23 * .data$sulfates), - b_35 = (-2.23 * .data$sulfonates), - b_36 = (-2.23 * .data$thiols), - b_37 = (-1.20 * .data$carbothioesters) + # use the coef for nitrate for the following groups + vb_32 = (b_k["b15"] * .data$phosphoric_acids), + vb_33 = (b_k["b15"] * .data$phosphoric_esters), + vb_34 = (b_k["b15"] * .data$sulfates), + vb_35 = (b_k["b15"] * .data$sulfonates), + vb_36 = (b_k["b15"] * .data$thiols), + # use the coef for ester for carbothioesters + vb_37 = (b_k["b11"] * .data$carbothioesters) ) } contributions %>% dplyr::rowwise() %>% - dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("b_")))) %>% + dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("vb_")))) %>% dplyr::ungroup() %>% - dplyr::select(-dplyr::starts_with("b_")) + dplyr::select(-dplyr::starts_with("vb_")) } diff --git a/man/simpol1.Rd b/man/simpol1.Rd index 1e0f461..c028cff 100644 --- a/man/simpol1.Rd +++ b/man/simpol1.Rd @@ -4,7 +4,7 @@ \alias{simpol1} \title{SIMPOL.1 method for calculating estimated volatility} \usage{ -simpol1(fx_groups, meredith = TRUE) +simpol1(fx_groups, meredith = TRUE, temp_c = 20) } \arguments{ \item{fx_groups}{A data.frame or tibble with counts of functional groups @@ -12,6 +12,13 @@ produced by \code{\link[=get_fx_groups]{get_fx_groups()}} (or manually, with the \item{meredith}{Logical; \code{FALSE}: use the original SIMPOL.1 method. \code{TRUE}: use the modified version in Meredith et al. (2023).} + +\item{temp_c}{Numeric; the temperature at which to calculate volatility +estimates. Pankow & Asher (2008) only used values between 0 and 120ºC so a +warning is printed if you supply a value outside of this range. Also note +that Pankow & Asher (2008) reported decreases in the accuracy of the +SIMPOL.1 method with temperatures at the lower end of this range (see Fig. +10 in their paper).} } \value{ The \code{fx_groups} tibble with the additional \code{log10_P} column. @@ -30,14 +37,20 @@ coefficients for each functional group (\eqn{b_K(T)}). Units are in log10 atmospheres. The modified method in Meredith et al. (2023) adds the following additional -functional groups and coefficients: +functional groups: + +Using the same coefficient as nitrate +\itemize{ +\item Phosphoric acid +\item Phosphoric ester +\item Sulfate +\item Sulfonate +\item Thiol +} + +Using the same coefficient as ester \itemize{ -\item Phosphoric acid (-2.23) -\item Phosphoric ester (-2.23) -\item Sulfate (-2.23) -\item Sulfonate (-2.23) -\item Thiol (-2.23) -\item Carbothioester (-1.20) +\item Carbothioester } } \note{ From c5aa4524bab4e658013371c4495521f6605209d9 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Fri, 9 Aug 2024 15:30:57 -0700 Subject: [PATCH 3/8] increased precision changed results slightly --- tests/testthat/test-calc_vol.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-calc_vol.R b/tests/testthat/test-calc_vol.R index 7d41806..7d1cb53 100644 --- a/tests/testthat/test-calc_vol.R +++ b/tests/testthat/test-calc_vol.R @@ -1,6 +1,6 @@ test_that("volatility estimate is correct", { - ex_vol_df <- calc_vol("data/C16181.mol") - expect_equal(round(ex_vol_df$rvi, 6), 6.975349) + ex_vol_df <- calc_vol(testthat::test_path("data/C16181.mol"), temp_c = 20) + expect_equal(round(ex_vol_df$rvi, 4), 6.9776) }) test_that("returns correct columns depending on return arguments", { @@ -28,11 +28,15 @@ test_that("calc_vol() works with multiple inputs", { test_that("smiles and .mol give same results", { paths <- - c("data/C16181.mol", - "data/map00361/C00011.mol", - "data/map00361/C00042.mol") + c(testthat::test_path("data/C16181.mol"), + testthat::test_path("data/map00361/C00011.mol"), + testthat::test_path("data/map00361/C00042.mol")) smiles <- - c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O") + c( + "beta-2,3,4,5,6-Pentachlorocyclohexanol" = "C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", + "CO2" = "O=C=O", + "Succinate" = "C(CC(=O)O)C(=O)O" + ) expect_equal( calc_vol(smiles, from = "smiles") %>% dplyr::select(-name,-smiles), calc_vol(paths) %>% dplyr::select(-name,-mol_path) From cb72d88c6923427830d6bf4b78915c53a35acdbe Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Fri, 9 Aug 2024 15:31:27 -0700 Subject: [PATCH 4/8] add temperature arg and fix the way named vectors of smiles works --- NEWS.md | 4 +++- R/calc_vol.R | 8 ++++++-- R/simpol1.R | 2 +- man/calc_vol.Rd | 4 ++++ man/simpol1.Rd | 12 ++++-------- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index d5fce7b..f0d15ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # volcalc (development version) -* adds a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB` +* Added a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB` +* Users can now supply a temperature at which to calculate volatility estimates. +* Inputs to `calc_vol()` supplied as a named vector of SMILES strings now properly pass the names to `ChemmineR::smile2sdf()` where they are used as the molecule name. # volcalc 2.1.2 diff --git a/R/calc_vol.R b/R/calc_vol.R index 5778cc6..50beca3 100644 --- a/R/calc_vol.R +++ b/R/calc_vol.R @@ -7,6 +7,8 @@ #' @param from The form of `input`. Either `"mol_path"` (default) or `"smiles"`. #' @param method The method for calculating estimated volatility. See #' [simpol1()] for more details. +#' @param temp_c numeric; For methods that allow it, specify temperature (in +#' degrees C) at which log10(P) estimates are calculated. #' @param environment The environment for calculating relative volatility #' categories. RVI thresholds for low, moderate, and high volatility are as #' follows: `"clean"` (clean atmosphere, default) -2, 0, 2; `"polluted"` @@ -59,6 +61,7 @@ calc_vol <- function(input, from = c("mol_path", "smiles"), method = c("meredith", "simpol1"), + temp_c = 20, environment = c("clean", "polluted", "soil"), validate = TRUE, return_fx_groups = FALSE, @@ -88,6 +91,8 @@ calc_vol <- } if(from == "smiles") { + # a way of converting into a list of *named* vectors, so smiles2sdf() runs on named vectors as input + input <- split(input, seq_along(input)) compound_sdf_list <- lapply(input, ChemmineR::smiles2sdf) } fx_groups_df_list <- @@ -98,7 +103,7 @@ calc_vol <- dplyr::bind_rows(fx_groups_df_list, .id = {{ from }}) # calculate relative volatility & categories from logP - vol_df <- simpol1(fx_groups_df, meredith = meredith) %>% + vol_df <- simpol1(fx_groups_df, meredith = meredith, temp_c = temp_c) %>% dplyr::mutate( # mass is converted from grams to micrograms # 0.0000821 is universal gas constant @@ -121,7 +126,6 @@ calc_vol <- colnames(fx_groups_df)[!colnames(fx_groups_df) %in% c("formula", "name", "molecular_weight")] } if (isTRUE(return_calc_steps)) { - #TODO document log_alpha (need to figure out why it's called that first) cols_calc <- c("molecular_weight", "log_alpha", "log10_P") } diff --git a/R/simpol1.R b/R/simpol1.R index 71b9970..4814b80 100644 --- a/R/simpol1.R +++ b/R/simpol1.R @@ -34,7 +34,7 @@ #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`: #' use the modified version in Meredith et al. (2023). #' @param temp_c Numeric; the temperature at which to calculate volatility -#' estimates. +#' estimates in degrees C. #' @returns The `fx_groups` tibble with the additional `log10_P` column. #' #' @references diff --git a/man/calc_vol.Rd b/man/calc_vol.Rd index 5e775db..5ac425a 100644 --- a/man/calc_vol.Rd +++ b/man/calc_vol.Rd @@ -8,6 +8,7 @@ calc_vol( input, from = c("mol_path", "smiles"), method = c("meredith", "simpol1"), + temp_c = 20, environment = c("clean", "polluted", "soil"), validate = TRUE, return_fx_groups = FALSE, @@ -22,6 +23,9 @@ calc_vol( \item{method}{The method for calculating estimated volatility. See \code{\link[=simpol1]{simpol1()}} for more details.} +\item{temp_c}{numeric; For methods that allow it, specify temperature (in +degrees C) at which log10(P) estimates are calculated.} + \item{environment}{The environment for calculating relative volatility categories. RVI thresholds for low, moderate, and high volatility are as follows: \code{"clean"} (clean atmosphere, default) -2, 0, 2; \code{"polluted"} diff --git a/man/simpol1.Rd b/man/simpol1.Rd index c028cff..c6c4693 100644 --- a/man/simpol1.Rd +++ b/man/simpol1.Rd @@ -14,11 +14,7 @@ produced by \code{\link[=get_fx_groups]{get_fx_groups()}} (or manually, with the use the modified version in Meredith et al. (2023).} \item{temp_c}{Numeric; the temperature at which to calculate volatility -estimates. Pankow & Asher (2008) only used values between 0 and 120ºC so a -warning is printed if you supply a value outside of this range. Also note -that Pankow & Asher (2008) reported decreases in the accuracy of the -SIMPOL.1 method with temperatures at the lower end of this range (see Fig. -10 in their paper).} +estimates in degrees C.} } \value{ The \code{fx_groups} tibble with the additional \code{log10_P} column. @@ -54,9 +50,9 @@ Using the same coefficient as ester } } \note{ -The method described in Pankow & Asher (2008) allows for -calculations of logP at different temperatures. This implementation -currently only calculates values at 20ºC. +The method described in Pankow & Asher (2008) was developed using data +between 0ºC and 120ºC, so extrapolating beyond that range is not +recommended. } \examples{ mol_path <- mol_example()[3] From 050cedd154074d6c7e1322097e69944ad4c541f2 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Fri, 9 Aug 2024 15:51:20 -0700 Subject: [PATCH 5/8] satisfy check() demands --- R/simpol1.R | 10 +++++----- man/simpol1.Rd | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/simpol1.R b/R/simpol1.R index 4814b80..c546454 100644 --- a/R/simpol1.R +++ b/R/simpol1.R @@ -26,7 +26,7 @@ #' - Carbothioester #' #' @note The method described in Pankow & Asher (2008) was developed using data -#' between 0ºC and 120ºC, so extrapolating beyond that range is not +#' between 0 °C and 120 °C, so extrapolating beyond that range is not #' recommended. #' #' @param fx_groups A data.frame or tibble with counts of functional groups @@ -34,7 +34,7 @@ #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`: #' use the modified version in Meredith et al. (2023). #' @param temp_c Numeric; the temperature at which to calculate volatility -#' estimates in degrees C. +#' estimates in °C. #' @returns The `fx_groups` tibble with the additional `log10_P` column. #' #' @references @@ -56,9 +56,9 @@ #' fx_groups <- get_fx_groups(sdf) #' simpol1(fx_groups) simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) { - #convert ºC to K + #convert C to K if (temp_c < 0 | temp_c > 120) { - warning("Temperatures below 0ºC or above 120ºC extrapolate beyond the range SIMPOL.1 was intended for. \n Interpret results with caution!") + warning("Temperatures below 0\u00b0C or above 120\u00b0C extrapolate beyond the range SIMPOL.1 was intended for. \n Interpret results with caution!") } temp_k <- temp_c + 273.15 @@ -168,7 +168,7 @@ calc_bk_simpol1 <- function(temp_k = 293.15) { ) b_k_df <- - dplyr::mutate(table_5, b_k = B1/temp_k + B2 + B3*temp_k + B4*log(temp_k)) + dplyr::mutate(table_5, b_k = .data$B1/temp_k + .data$B2 + .data$B3*temp_k + .data$B4*log(temp_k)) out <- b_k_df$b_k names(out) <- paste0("b", b_k_df$k) diff --git a/man/simpol1.Rd b/man/simpol1.Rd index c6c4693..b85897b 100644 --- a/man/simpol1.Rd +++ b/man/simpol1.Rd @@ -14,7 +14,7 @@ produced by \code{\link[=get_fx_groups]{get_fx_groups()}} (or manually, with the use the modified version in Meredith et al. (2023).} \item{temp_c}{Numeric; the temperature at which to calculate volatility -estimates in degrees C.} +estimates in °C.} } \value{ The \code{fx_groups} tibble with the additional \code{log10_P} column. @@ -51,7 +51,7 @@ Using the same coefficient as ester } \note{ The method described in Pankow & Asher (2008) was developed using data -between 0ºC and 120ºC, so extrapolating beyond that range is not +between 0 °C and 120 °C, so extrapolating beyond that range is not recommended. } \examples{ From ed334fc878b35aea3799a1594400bbcaeb05f348 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Thu, 15 Aug 2024 16:25:00 -0700 Subject: [PATCH 6/8] add simple test for temp_c arg --- tests/testthat/test-simpol1.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-simpol1.R b/tests/testthat/test-simpol1.R index 86ba1e1..ec6f1ec 100644 --- a/tests/testthat/test-simpol1.R +++ b/tests/testthat/test-simpol1.R @@ -15,3 +15,11 @@ test_that("isoprene is more volatile than glucose", { expect_gt(log10P_together[1], log10P_together[2]) expect_equal(log10P_together, log10P_separate) }) + +test_that("changing temperature does something", { + isoprene <- ChemmineR::read.SDFset(mol_example()[5]) + groups <- get_fx_groups(isoprene) + c_30 <- simpol1(groups, temp_c = 30)$log10_P + c_20 <- simpol1(groups, temp_c = 20)$log10_P + expect_gt(c_30, c_20) +}) From 99fe477be15deca3ad5116984e41884e952fca30 Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 9 Dec 2024 11:43:09 -0700 Subject: [PATCH 7/8] alert users about slight change in numbers --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index f0d15ce..2d7bf51 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # volcalc (development version) * Added a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB` -* Users can now supply a temperature at which to calculate volatility estimates. +* Users can now supply a temperature at which to calculate volatility estimates. As a result, RVI values may now be *slightly* different compared to those from `volcalc` v2.1.2 due to rounding differences. * Inputs to `calc_vol()` supplied as a named vector of SMILES strings now properly pass the names to `ChemmineR::smile2sdf()` where they are used as the molecule name. # volcalc 2.1.2 From e101e9274259edfb22219ca2582b748dd5e2831a Mon Sep 17 00:00:00 2001 From: Eric Scott Date: Mon, 9 Dec 2024 11:44:47 -0700 Subject: [PATCH 8/8] indentation --- R/simpol1.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/simpol1.R b/R/simpol1.R index c546454..16272f5 100644 --- a/R/simpol1.R +++ b/R/simpol1.R @@ -77,7 +77,7 @@ simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) { vb_00 = (b_k["b0"] * 1), #b_0(T) is an intercept/constant. vb_01 = (b_k["b1"] * .data$carbons), vb_02 = (b_k["b2"] * .data$carbons_asa), - vb_03 = (b_k["b3"] * .data$rings_aromatic), + vb_03 = (b_k["b3"] * .data$rings_aromatic), vb_04 = (b_k["b4"] * .data$rings_aliphatic), vb_05 = (b_k["b5"] * .data$carbon_dbl_bonds_aliphatic), vb_06 = (b_k["b6"] * .data$CCCO_aliphatic_ring),