From dfc365743e14dcbbcebc1090ec29ad7df4793129 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Tue, 6 Aug 2024 16:49:21 -0700
Subject: [PATCH 1/8] calculate b_k using table 5

---
 R/simpol1.R | 121 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 86 insertions(+), 35 deletions(-)

diff --git a/R/simpol1.R b/R/simpol1.R
index dbd824f..f09f0a2 100644
--- a/R/simpol1.R
+++ b/R/simpol1.R
@@ -50,51 +50,55 @@
 #' sdf <- ChemmineR::read.SDFset(mol_path)
 #' fx_groups <- get_fx_groups(sdf)
 #' simpol1(fx_groups)
-simpol1 <- function(fx_groups, meredith = TRUE) {
-  betas <- fx_groups %>%
+simpol1 <- function(fx_groups, temp_k, meredith = TRUE) {
+  b_k <- calc_bk_simpol1(temp_k)
+  
+  contributions <- fx_groups %>%
     # assume NAs are 0s for the sake of this calculation
     dplyr::mutate(dplyr::across(dplyr::where(is.integer), function(x)
       ifelse(is.na(x), 0L, x))) %>%
     # TODO: There is also a table of coefs and equation to calculate b_k(T) at
     # different values of T. Why wasn't this implemented?
+    
     dplyr::mutate(
       # multiplier for each functional group is volatility contribution to log10P
       # b_k(T)  * v_k,i
-      b_00 = (1.79     * 1), #b_0(T) is an intercept/constant.  
-      b_01 = (-0.438   * .data$carbons),
-      b_02 = (-0.0338  * .data$carbons_asa),
-      b_03 = (-0.675	 * .data$rings_aromatic),
-      b_04 = (-0.0104  * .data$rings_aliphatic),
-      b_05 = (-0.105   * .data$carbon_dbl_bonds_aliphatic),
-      b_06 = (-0.506   * .data$CCCO_aliphatic_ring),
-      b_07 = (-2.23	   * .data$hydroxyl_aliphatic),
-      b_08 = (-1.35	   * .data$aldehydes),
-      b_09 = (-0.935   * .data$ketones),
-      b_10 = (-3.58	   * .data$carbox_acids),
-      b_11 = (-1.20	   * .data$ester),
-      b_12 = (-0.718   * .data$ether_alkyl),
-      b_13 = (-0.683   * .data$ether_alicyclic),
-      b_14 = (-1.03	   * .data$ether_aromatic),
-      b_15 = (-2.23	   * .data$nitrate),
-      b_16 = (-2.15	   * .data$nitro),
-      b_17 = (-2.14	   * .data$hydroxyl_aromatic),
-      b_18 = (-1.03	   * .data$amine_primary),
-      b_19 = (-0.849	 * .data$amine_secondary),
-      b_20 = (-0.608 	 * .data$amine_tertiary),
-      b_21 = (-1.61    * .data$amine_aromatic),
-      b_22 = (-4.49    * .data$amide_primary),
-      b_23 = (-5.26    * .data$amide_secondary),
-      b_24 = (-2.63    * .data$amide_tertiary),
-      b_25 = (-2.34    * .data$carbonylperoxynitrate),
-      b_26 = (-0.368   * .data$peroxide),
-      b_27 = (-2.48	   * .data$hydroperoxide),
-      b_28 = (-2.48    * .data$carbonylperoxyacid),
-      b_29 = (0.0432 	 * .data$nitrophenol),
-      b_30 = (-2.67	   * .data$nitroester)
+      b_00 = (b_k["b0"]  * 1), #b_0(T) is an intercept/constant.  
+      b_01 = (b_k["b1"]  * .data$carbons),
+      b_02 = (b_k["b2"]  * .data$carbons_asa),
+      b_03 = (b_k["b3"]	 * .data$rings_aromatic),
+      b_04 = (b_k["b4"]  * .data$rings_aliphatic),
+      b_05 = (b_k["b5"]  * .data$carbon_dbl_bonds_aliphatic),
+      b_06 = (b_k["b6"]  * .data$CCCO_aliphatic_ring),
+      b_07 = (b_k["b7"]  * .data$hydroxyl_aliphatic),
+      b_08 = (b_k["b8"]  * .data$aldehydes),
+      b_09 = (b_k["b9"]  * .data$ketones),
+      b_10 = (b_k["b10"]  * .data$carbox_acids),
+      b_11 = (b_k["b11"]  * .data$ester),
+      b_12 = (b_k["b12"]  * .data$ether_alkyl),
+      b_13 = (b_k["b13"]  * .data$ether_alicyclic),
+      b_14 = (b_k["b14"]  * .data$ether_aromatic),
+      b_15 = (b_k["b15"]  * .data$nitrate),
+      b_16 = (b_k["b16"]  * .data$nitro),
+      b_17 = (b_k["b17"]  * .data$hydroxyl_aromatic),
+      b_18 = (b_k["b18"]  * .data$amine_primary),
+      b_19 = (b_k["b19"]	 * .data$amine_secondary),
+      b_20 = (b_k["b20"]	 * .data$amine_tertiary),
+      b_21 = (b_k["b21"]  * .data$amine_aromatic),
+      b_22 = (b_k["b22"]  * .data$amide_primary),
+      b_23 = (b_k["b23"]  * .data$amide_secondary),
+      b_24 = (b_k["b24"]  * .data$amide_tertiary),
+      b_25 = (b_k["b25"]  * .data$carbonylperoxynitrate),
+      b_26 = (b_k["b26"]  * .data$peroxide),
+      b_27 = (b_k["b27"]  * .data$hydroperoxide),
+      b_28 = (b_k["b28"]  * .data$carbonylperoxyacid),
+      b_29 = (b_k["b29"]	 * .data$nitrophenol),
+      b_30 = (b_k["b30"]  * .data$nitroester)
     ) 
   
   if (isTRUE(meredith)) {
-    betas <- betas %>% 
+    contributions <- contributions %>% 
+      #TODO: either scale these with temp_k or don't allow use with temp_k other than 293.15
       dplyr::mutate(
         # # Below are additions from Meredith et al.
         b_32 = (-2.23	  * .data$phosphoric_acids),
@@ -106,9 +110,56 @@ simpol1 <- function(fx_groups, meredith = TRUE) {
       )
   }
   
-  betas %>% 
+  contributions %>% 
     dplyr::rowwise() %>% 
     dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("b_")))) %>% 
     dplyr::ungroup() %>% 
     dplyr::select(-dplyr::starts_with("b_"))
+}
+
+
+#from table 5 of Pankow & Asher
+calc_bk_simpol1 <- function(temp_k = 293.15) {
+  table_5 <- tibble::tribble(
+    ~k, ~B1, ~B2, ~B3, ~B4,
+    0, -4.26938E+02, 2.89223E-01, 4.42057E-03, 2.92846E-01,
+    1, -4.11248E+02, 8.96919E-01, -2.48607E-03, 1.40312E-01,
+    2, -1.46442E+02, 1.54528E+00, 1.71021E-03, -2.78291E-01,
+    3, 3.50262E+01, -9.20839E-01, 2.24399E-03, -9.36300E-02,
+    4, -8.72770E+01, 1.78059E+00, -3.07187E-03, -1.04341E-01,
+    5, 5.73335E+00, 1.69764E-02, -6.28957E-04, 7.55434E-03,
+    6, -2.61268E+02, -7.63282E-01, -1.68213E-03, 2.89038E-01,
+    7, -7.25373E+02, 8.26326E-01, 2.50957E-03, -2.32304E-01,
+    8, -7.29501E+02, 9.86017E-01, -2.92664E-03, 1.78077E-01,
+    9, -1.37456E+01, 5.23486E-01, 5.50298E-04, -2.76950E-01,
+    10, -7.98796E+02, -1.09436E+00, 5.24132E-03, -2.28040E-01,
+    11, -3.93345E+02, -9.51778E-01, -2.19071E-03, 3.05843E-01,
+    12, -1.44334E+02, -1.85617E+00, -2.37491E-05, 2.88290E-01,
+    13, 4.05265E+01, -2.43780E+00, 3.60133E-03, 9.86422E-02,
+    14, -7.07406E+01, -1.06674E+00, 3.73104E-03, -1.44003E-01,
+    15, -7.83648E+02, -1.03439E+00, -1.07148E-03, 3.15535E-01,
+    16, -5.63872E+02, -7.18416E-01, 2.63016E-03, -4.99470E-02,
+    17, -4.53961E+02, -3.26105E-01, -1.39780E-04, -3.93916E-02,
+    18, 3.71375E+01, -2.66753E+00, 1.01483E-03, 2.14233E-01,
+    19, -5.03710E+02, 1.04092E+00, -4.12746E-03, 1.82790E-01,
+    20, -3.59763E+01, -4.08458E-01, 1.67264E-03, -9.98919E-02,
+    21, -6.09432E+02, 1.50436E+00, -9.09024E-04, -1.35495E-01,
+    22, -1.02367E+02, -7.16253E-01, -2.90670E-04, -5.88556E-01,
+    23, -1.93802E+03, 6.48262E-01, 1.73245E-03, 3.47940E-02,
+    24, -5.26919E+00, 3.06435E-01, 3.25397E-03, -6.81506E-01,
+    25, -2.84042E+02, -6.25424E-01, -8.22474E-04, -8.80549E-02,
+    26, 1.50093E+02, 2.39875E-02, -3.37969E-03, 1.52789E-02,
+    27, -2.03387E+01, -5.48718E+00, 8.39075E-03, 1.07884E-01,
+    28, -8.38064E+02, -1.09600E+00, -4.24385E-04, 2.81812E-01,
+    29, -5.27934E+01, -4.63689E-01, -5.11647E-03, 3.84965E-01,
+    30, -1.61520E+03, 9.01669E-01, 1.44536E-03, 2.66889E-01
+  )
+  
+  b_k_df <- 
+    dplyr::mutate(table_5, b_k = B1/temp_k + B2 + B3*temp_k + B4*log(temp_k))
+  
+  out <- b_k_df$b_k
+  names(out) <- paste0("b", b_k_df$k)
+  #return:
+  out
 }
\ No newline at end of file

From 09d2597f728e931c003ac3292264a3933ed3b427 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Fri, 9 Aug 2024 14:08:19 -0700
Subject: [PATCH 2/8] finish updating simpol1 to allow for specification of
 temperature

---
 DESCRIPTION    |   2 +-
 R/simpol1.R    | 116 +++++++++++++++++++++++++++----------------------
 man/simpol1.Rd |  29 +++++++++----
 3 files changed, 86 insertions(+), 61 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 763a016..865a98d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -46,4 +46,4 @@ biocViews:
 Config/testthat/edition: 3
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
diff --git a/R/simpol1.R b/R/simpol1.R
index f09f0a2..71b9970 100644
--- a/R/simpol1.R
+++ b/R/simpol1.R
@@ -12,24 +12,29 @@
 #' atmospheres.
 #' 
 #' The modified method in Meredith et al. (2023) adds the following additional
-#' functional groups and coefficients:
+#' functional groups:
 #' 
-#' - Phosphoric acid (-2.23)
-#' - Phosphoric ester (-2.23)
-#' - Sulfate (-2.23)
-#' - Sulfonate (-2.23)
-#' - Thiol (-2.23)
-#' - Carbothioester (-1.20)
+#' Using the same coefficient as nitrate
 #' 
-#' @note The method described in Pankow & Asher (2008) allows for
-#' calculations of logP at different temperatures.  This implementation
-#' currently only calculates values at 20ºC.
+#' - Phosphoric acid
+#' - Phosphoric ester
+#' - Sulfate
+#' - Sulfonate
+#' - Thiol
 #' 
+#' Using the same coefficient as ester
+#' - Carbothioester
+#' 
+#' @note The method described in Pankow & Asher (2008) was developed using data
+#'   between 0ºC and 120ºC, so extrapolating beyond that range is not
+#'   recommended.
+#'
 #' @param fx_groups A data.frame or tibble with counts of functional groups
 #'   produced by [get_fx_groups()] (or manually, with the same column names).
 #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`:
 #'   use the modified version in Meredith et al. (2023).
-#'
+#' @param temp_c Numeric; the temperature at which to calculate volatility
+#'   estimates.
 #' @returns The `fx_groups` tibble with the additional `log10_P` column.
 #'
 #' @references 
@@ -50,7 +55,13 @@
 #' sdf <- ChemmineR::read.SDFset(mol_path)
 #' fx_groups <- get_fx_groups(sdf)
 #' simpol1(fx_groups)
-simpol1 <- function(fx_groups, temp_k, meredith = TRUE) {
+simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) {
+  #convert ºC to K
+  if (temp_c < 0 | temp_c > 120) {
+    warning("Temperatures below 0ºC or above 120ºC extrapolate beyond the range SIMPOL.1 was intended for. \n  Interpret results with caution!")
+  }
+  temp_k <- temp_c + 273.15
+  
   b_k <- calc_bk_simpol1(temp_k)
   
   contributions <- fx_groups %>%
@@ -63,58 +74,59 @@ simpol1 <- function(fx_groups, temp_k, meredith = TRUE) {
     dplyr::mutate(
       # multiplier for each functional group is volatility contribution to log10P
       # b_k(T)  * v_k,i
-      b_00 = (b_k["b0"]  * 1), #b_0(T) is an intercept/constant.  
-      b_01 = (b_k["b1"]  * .data$carbons),
-      b_02 = (b_k["b2"]  * .data$carbons_asa),
-      b_03 = (b_k["b3"]	 * .data$rings_aromatic),
-      b_04 = (b_k["b4"]  * .data$rings_aliphatic),
-      b_05 = (b_k["b5"]  * .data$carbon_dbl_bonds_aliphatic),
-      b_06 = (b_k["b6"]  * .data$CCCO_aliphatic_ring),
-      b_07 = (b_k["b7"]  * .data$hydroxyl_aliphatic),
-      b_08 = (b_k["b8"]  * .data$aldehydes),
-      b_09 = (b_k["b9"]  * .data$ketones),
-      b_10 = (b_k["b10"]  * .data$carbox_acids),
-      b_11 = (b_k["b11"]  * .data$ester),
-      b_12 = (b_k["b12"]  * .data$ether_alkyl),
-      b_13 = (b_k["b13"]  * .data$ether_alicyclic),
-      b_14 = (b_k["b14"]  * .data$ether_aromatic),
-      b_15 = (b_k["b15"]  * .data$nitrate),
-      b_16 = (b_k["b16"]  * .data$nitro),
-      b_17 = (b_k["b17"]  * .data$hydroxyl_aromatic),
-      b_18 = (b_k["b18"]  * .data$amine_primary),
-      b_19 = (b_k["b19"]	 * .data$amine_secondary),
-      b_20 = (b_k["b20"]	 * .data$amine_tertiary),
-      b_21 = (b_k["b21"]  * .data$amine_aromatic),
-      b_22 = (b_k["b22"]  * .data$amide_primary),
-      b_23 = (b_k["b23"]  * .data$amide_secondary),
-      b_24 = (b_k["b24"]  * .data$amide_tertiary),
-      b_25 = (b_k["b25"]  * .data$carbonylperoxynitrate),
-      b_26 = (b_k["b26"]  * .data$peroxide),
-      b_27 = (b_k["b27"]  * .data$hydroperoxide),
-      b_28 = (b_k["b28"]  * .data$carbonylperoxyacid),
-      b_29 = (b_k["b29"]	 * .data$nitrophenol),
-      b_30 = (b_k["b30"]  * .data$nitroester)
+      vb_00 = (b_k["b0"]  * 1), #b_0(T) is an intercept/constant.  
+      vb_01 = (b_k["b1"]  * .data$carbons),
+      vb_02 = (b_k["b2"]  * .data$carbons_asa),
+      vb_03 = (b_k["b3"]	 * .data$rings_aromatic),
+      vb_04 = (b_k["b4"]  * .data$rings_aliphatic),
+      vb_05 = (b_k["b5"]  * .data$carbon_dbl_bonds_aliphatic),
+      vb_06 = (b_k["b6"]  * .data$CCCO_aliphatic_ring),
+      vb_07 = (b_k["b7"]  * .data$hydroxyl_aliphatic),
+      vb_08 = (b_k["b8"]  * .data$aldehydes),
+      vb_09 = (b_k["b9"]  * .data$ketones),
+      vb_10 = (b_k["b10"]  * .data$carbox_acids),
+      vb_11 = (b_k["b11"]  * .data$ester),
+      vb_12 = (b_k["b12"]  * .data$ether_alkyl),
+      vb_13 = (b_k["b13"]  * .data$ether_alicyclic),
+      vb_14 = (b_k["b14"]  * .data$ether_aromatic),
+      vb_15 = (b_k["b15"]  * .data$nitrate),
+      vb_16 = (b_k["b16"]  * .data$nitro),
+      vb_17 = (b_k["b17"]  * .data$hydroxyl_aromatic),
+      vb_18 = (b_k["b18"]  * .data$amine_primary),
+      vb_19 = (b_k["b19"]	 * .data$amine_secondary),
+      vb_20 = (b_k["b20"]	 * .data$amine_tertiary),
+      vb_21 = (b_k["b21"]  * .data$amine_aromatic),
+      vb_22 = (b_k["b22"]  * .data$amide_primary),
+      vb_23 = (b_k["b23"]  * .data$amide_secondary),
+      vb_24 = (b_k["b24"]  * .data$amide_tertiary),
+      vb_25 = (b_k["b25"]  * .data$carbonylperoxynitrate),
+      vb_26 = (b_k["b26"]  * .data$peroxide),
+      vb_27 = (b_k["b27"]  * .data$hydroperoxide),
+      vb_28 = (b_k["b28"]  * .data$carbonylperoxyacid),
+      vb_29 = (b_k["b29"]	 * .data$nitrophenol),
+      vb_30 = (b_k["b30"]  * .data$nitroester)
     ) 
   
   if (isTRUE(meredith)) {
     contributions <- contributions %>% 
-      #TODO: either scale these with temp_k or don't allow use with temp_k other than 293.15
       dplyr::mutate(
         # # Below are additions from Meredith et al.
-        b_32 = (-2.23	  * .data$phosphoric_acids),
-        b_33 = (-2.23	  * .data$phosphoric_esters),
-        b_34 = (-2.23	  * .data$sulfates),
-        b_35 = (-2.23	  * .data$sulfonates),
-        b_36 = (-2.23	  * .data$thiols),
-        b_37 = (-1.20	  * .data$carbothioesters)
+        # use the coef for nitrate for the following groups
+        vb_32 = (b_k["b15"] * .data$phosphoric_acids),
+        vb_33 = (b_k["b15"] * .data$phosphoric_esters),
+        vb_34 = (b_k["b15"] * .data$sulfates),
+        vb_35 = (b_k["b15"] * .data$sulfonates),
+        vb_36 = (b_k["b15"] * .data$thiols),
+        # use the coef for ester for carbothioesters
+        vb_37 = (b_k["b11"] * .data$carbothioesters)
       )
   }
   
   contributions %>% 
     dplyr::rowwise() %>% 
-    dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("b_")))) %>% 
+    dplyr::mutate(log10_P = sum(dplyr::c_across(dplyr::starts_with("vb_")))) %>% 
     dplyr::ungroup() %>% 
-    dplyr::select(-dplyr::starts_with("b_"))
+    dplyr::select(-dplyr::starts_with("vb_"))
 }
 
 
diff --git a/man/simpol1.Rd b/man/simpol1.Rd
index 1e0f461..c028cff 100644
--- a/man/simpol1.Rd
+++ b/man/simpol1.Rd
@@ -4,7 +4,7 @@
 \alias{simpol1}
 \title{SIMPOL.1 method for calculating estimated volatility}
 \usage{
-simpol1(fx_groups, meredith = TRUE)
+simpol1(fx_groups, meredith = TRUE, temp_c = 20)
 }
 \arguments{
 \item{fx_groups}{A data.frame or tibble with counts of functional groups
@@ -12,6 +12,13 @@ produced by \code{\link[=get_fx_groups]{get_fx_groups()}} (or manually, with the
 
 \item{meredith}{Logical; \code{FALSE}: use the original SIMPOL.1 method. \code{TRUE}:
 use the modified version in Meredith et al. (2023).}
+
+\item{temp_c}{Numeric; the temperature at which to calculate volatility
+estimates. Pankow & Asher (2008) only used values between 0 and 120ºC so a
+warning is printed if you supply a value outside of this range. Also note
+that Pankow & Asher (2008) reported decreases in the accuracy of the
+SIMPOL.1 method with temperatures at the lower end of this range (see Fig.
+10 in their paper).}
 }
 \value{
 The \code{fx_groups} tibble with the additional \code{log10_P} column.
@@ -30,14 +37,20 @@ coefficients for each functional group (\eqn{b_K(T)}). Units are in log10
 atmospheres.
 
 The modified method in Meredith et al. (2023) adds the following additional
-functional groups and coefficients:
+functional groups:
+
+Using the same coefficient as nitrate
+\itemize{
+\item Phosphoric acid
+\item Phosphoric ester
+\item Sulfate
+\item Sulfonate
+\item Thiol
+}
+
+Using the same coefficient as ester
 \itemize{
-\item Phosphoric acid (-2.23)
-\item Phosphoric ester (-2.23)
-\item Sulfate (-2.23)
-\item Sulfonate (-2.23)
-\item Thiol (-2.23)
-\item Carbothioester (-1.20)
+\item Carbothioester
 }
 }
 \note{

From c5aa4524bab4e658013371c4495521f6605209d9 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Fri, 9 Aug 2024 15:30:57 -0700
Subject: [PATCH 3/8] increased precision changed results slightly

---
 tests/testthat/test-calc_vol.R | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test-calc_vol.R b/tests/testthat/test-calc_vol.R
index 7d41806..7d1cb53 100644
--- a/tests/testthat/test-calc_vol.R
+++ b/tests/testthat/test-calc_vol.R
@@ -1,6 +1,6 @@
 test_that("volatility estimate is correct", {
-  ex_vol_df <- calc_vol("data/C16181.mol")
-  expect_equal(round(ex_vol_df$rvi, 6), 6.975349)
+  ex_vol_df <- calc_vol(testthat::test_path("data/C16181.mol"), temp_c = 20)
+  expect_equal(round(ex_vol_df$rvi, 4), 6.9776)
 })
 
 test_that("returns correct columns depending on return arguments", {
@@ -28,11 +28,15 @@ test_that("calc_vol() works with multiple inputs", {
 
 test_that("smiles and .mol give same results", {
   paths <-
-    c("data/C16181.mol",
-      "data/map00361/C00011.mol",
-      "data/map00361/C00042.mol")
+    c(testthat::test_path("data/C16181.mol"),
+      testthat::test_path("data/map00361/C00011.mol"),
+      testthat::test_path("data/map00361/C00042.mol"))
   smiles <-
-    c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
+    c(
+      "beta-2,3,4,5,6-Pentachlorocyclohexanol" = "C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O",
+      "CO2" = "O=C=O",
+      "Succinate" = "C(CC(=O)O)C(=O)O"
+    )
   expect_equal(
     calc_vol(smiles, from = "smiles") %>% dplyr::select(-name,-smiles),
     calc_vol(paths) %>% dplyr::select(-name,-mol_path)

From cb72d88c6923427830d6bf4b78915c53a35acdbe Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Fri, 9 Aug 2024 15:31:27 -0700
Subject: [PATCH 4/8] add temperature arg and fix the way named vectors of
 smiles works

---
 NEWS.md         |  4 +++-
 R/calc_vol.R    |  8 ++++++--
 R/simpol1.R     |  2 +-
 man/calc_vol.Rd |  4 ++++
 man/simpol1.Rd  | 12 ++++--------
 5 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index d5fce7b..f0d15ce 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,8 @@
 # volcalc (development version)
 
-* adds a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
+* Added a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
+* Users can now supply a temperature at which to calculate volatility estimates.
+* Inputs to `calc_vol()` supplied as a named vector of SMILES strings now properly pass the names to `ChemmineR::smile2sdf()` where they are used as the molecule name.
 
 # volcalc 2.1.2
 
diff --git a/R/calc_vol.R b/R/calc_vol.R
index 5778cc6..50beca3 100644
--- a/R/calc_vol.R
+++ b/R/calc_vol.R
@@ -7,6 +7,8 @@
 #' @param from The form of `input`. Either `"mol_path"` (default) or `"smiles"`.
 #' @param method The method for calculating estimated volatility. See
 #'   [simpol1()] for more details.
+#' @param temp_c numeric; For methods that allow it, specify temperature (in
+#'   degrees C) at which log10(P) estimates are calculated.
 #' @param environment The environment for calculating relative volatility
 #'   categories. RVI thresholds for low, moderate, and high volatility are as
 #'   follows: `"clean"` (clean atmosphere, default) -2, 0, 2; `"polluted"`
@@ -59,6 +61,7 @@ calc_vol <-
   function(input, 
            from = c("mol_path", "smiles"),
            method = c("meredith", "simpol1"),
+           temp_c = 20,
            environment = c("clean", "polluted", "soil"),
            validate = TRUE,
            return_fx_groups = FALSE,
@@ -88,6 +91,8 @@ calc_vol <-
     }
     
     if(from == "smiles") {
+      # a way of converting into a list of *named* vectors, so smiles2sdf() runs on named vectors as input
+      input <- split(input, seq_along(input))
       compound_sdf_list <- lapply(input, ChemmineR::smiles2sdf)
     }
     fx_groups_df_list <-
@@ -98,7 +103,7 @@ calc_vol <-
       dplyr::bind_rows(fx_groups_df_list, .id = {{ from }}) 
     
     # calculate relative volatility & categories from logP
-    vol_df <- simpol1(fx_groups_df, meredith = meredith) %>% 
+    vol_df <- simpol1(fx_groups_df, meredith = meredith, temp_c = temp_c) %>% 
       dplyr::mutate(
         # mass is converted from grams to micrograms
         # 0.0000821 is universal gas constant
@@ -121,7 +126,6 @@ calc_vol <-
         colnames(fx_groups_df)[!colnames(fx_groups_df) %in% c("formula", "name", "molecular_weight")]
     }
     if (isTRUE(return_calc_steps)) {
-      #TODO document log_alpha (need to figure out why it's called that first)
       cols_calc <- c("molecular_weight", "log_alpha", "log10_P")
     }
     
diff --git a/R/simpol1.R b/R/simpol1.R
index 71b9970..4814b80 100644
--- a/R/simpol1.R
+++ b/R/simpol1.R
@@ -34,7 +34,7 @@
 #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`:
 #'   use the modified version in Meredith et al. (2023).
 #' @param temp_c Numeric; the temperature at which to calculate volatility
-#'   estimates.
+#'   estimates in degrees C.
 #' @returns The `fx_groups` tibble with the additional `log10_P` column.
 #'
 #' @references 
diff --git a/man/calc_vol.Rd b/man/calc_vol.Rd
index 5e775db..5ac425a 100644
--- a/man/calc_vol.Rd
+++ b/man/calc_vol.Rd
@@ -8,6 +8,7 @@ calc_vol(
   input,
   from = c("mol_path", "smiles"),
   method = c("meredith", "simpol1"),
+  temp_c = 20,
   environment = c("clean", "polluted", "soil"),
   validate = TRUE,
   return_fx_groups = FALSE,
@@ -22,6 +23,9 @@ calc_vol(
 \item{method}{The method for calculating estimated volatility. See
 \code{\link[=simpol1]{simpol1()}} for more details.}
 
+\item{temp_c}{numeric; For methods that allow it, specify temperature (in
+degrees C) at which log10(P) estimates are calculated.}
+
 \item{environment}{The environment for calculating relative volatility
 categories. RVI thresholds for low, moderate, and high volatility are as
 follows: \code{"clean"} (clean atmosphere, default) -2, 0, 2; \code{"polluted"}
diff --git a/man/simpol1.Rd b/man/simpol1.Rd
index c028cff..c6c4693 100644
--- a/man/simpol1.Rd
+++ b/man/simpol1.Rd
@@ -14,11 +14,7 @@ produced by \code{\link[=get_fx_groups]{get_fx_groups()}} (or manually, with the
 use the modified version in Meredith et al. (2023).}
 
 \item{temp_c}{Numeric; the temperature at which to calculate volatility
-estimates. Pankow & Asher (2008) only used values between 0 and 120ºC so a
-warning is printed if you supply a value outside of this range. Also note
-that Pankow & Asher (2008) reported decreases in the accuracy of the
-SIMPOL.1 method with temperatures at the lower end of this range (see Fig.
-10 in their paper).}
+estimates in degrees C.}
 }
 \value{
 The \code{fx_groups} tibble with the additional \code{log10_P} column.
@@ -54,9 +50,9 @@ Using the same coefficient as ester
 }
 }
 \note{
-The method described in Pankow & Asher (2008) allows for
-calculations of logP at different temperatures.  This implementation
-currently only calculates values at 20ºC.
+The method described in Pankow & Asher (2008) was developed using data
+between 0ºC and 120ºC, so extrapolating beyond that range is not
+recommended.
 }
 \examples{
 mol_path <- mol_example()[3]

From 050cedd154074d6c7e1322097e69944ad4c541f2 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Fri, 9 Aug 2024 15:51:20 -0700
Subject: [PATCH 5/8] satisfy check() demands

---
 R/simpol1.R    | 10 +++++-----
 man/simpol1.Rd |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/simpol1.R b/R/simpol1.R
index 4814b80..c546454 100644
--- a/R/simpol1.R
+++ b/R/simpol1.R
@@ -26,7 +26,7 @@
 #' - Carbothioester
 #' 
 #' @note The method described in Pankow & Asher (2008) was developed using data
-#'   between 0ºC and 120ºC, so extrapolating beyond that range is not
+#'   between 0 °C and 120 °C, so extrapolating beyond that range is not
 #'   recommended.
 #'
 #' @param fx_groups A data.frame or tibble with counts of functional groups
@@ -34,7 +34,7 @@
 #' @param meredith Logical; `FALSE`: use the original SIMPOL.1 method. `TRUE`:
 #'   use the modified version in Meredith et al. (2023).
 #' @param temp_c Numeric; the temperature at which to calculate volatility
-#'   estimates in degrees C.
+#'   estimates in °C.
 #' @returns The `fx_groups` tibble with the additional `log10_P` column.
 #'
 #' @references 
@@ -56,9 +56,9 @@
 #' fx_groups <- get_fx_groups(sdf)
 #' simpol1(fx_groups)
 simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) {
-  #convert ºC to K
+  #convert C to K
   if (temp_c < 0 | temp_c > 120) {
-    warning("Temperatures below 0ºC or above 120ºC extrapolate beyond the range SIMPOL.1 was intended for. \n  Interpret results with caution!")
+    warning("Temperatures below 0\u00b0C or above 120\u00b0C extrapolate beyond the range SIMPOL.1 was intended for. \n  Interpret results with caution!")
   }
   temp_k <- temp_c + 273.15
   
@@ -168,7 +168,7 @@ calc_bk_simpol1 <- function(temp_k = 293.15) {
   )
   
   b_k_df <- 
-    dplyr::mutate(table_5, b_k = B1/temp_k + B2 + B3*temp_k + B4*log(temp_k))
+    dplyr::mutate(table_5, b_k = .data$B1/temp_k + .data$B2 + .data$B3*temp_k + .data$B4*log(temp_k))
   
   out <- b_k_df$b_k
   names(out) <- paste0("b", b_k_df$k)
diff --git a/man/simpol1.Rd b/man/simpol1.Rd
index c6c4693..b85897b 100644
--- a/man/simpol1.Rd
+++ b/man/simpol1.Rd
@@ -14,7 +14,7 @@ produced by \code{\link[=get_fx_groups]{get_fx_groups()}} (or manually, with the
 use the modified version in Meredith et al. (2023).}
 
 \item{temp_c}{Numeric; the temperature at which to calculate volatility
-estimates in degrees C.}
+estimates in °C.}
 }
 \value{
 The \code{fx_groups} tibble with the additional \code{log10_P} column.
@@ -51,7 +51,7 @@ Using the same coefficient as ester
 }
 \note{
 The method described in Pankow & Asher (2008) was developed using data
-between 0ºC and 120ºC, so extrapolating beyond that range is not
+between 0 °C and 120 °C, so extrapolating beyond that range is not
 recommended.
 }
 \examples{

From ed334fc878b35aea3799a1594400bbcaeb05f348 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Thu, 15 Aug 2024 16:25:00 -0700
Subject: [PATCH 6/8] add simple test for temp_c arg

---
 tests/testthat/test-simpol1.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/testthat/test-simpol1.R b/tests/testthat/test-simpol1.R
index 86ba1e1..ec6f1ec 100644
--- a/tests/testthat/test-simpol1.R
+++ b/tests/testthat/test-simpol1.R
@@ -15,3 +15,11 @@ test_that("isoprene is more volatile than glucose", {
   expect_gt(log10P_together[1], log10P_together[2])
   expect_equal(log10P_together, log10P_separate)
 })
+
+test_that("changing temperature does something", {
+  isoprene <- ChemmineR::read.SDFset(mol_example()[5])
+  groups <- get_fx_groups(isoprene)
+  c_30 <- simpol1(groups, temp_c = 30)$log10_P
+  c_20 <- simpol1(groups, temp_c = 20)$log10_P
+  expect_gt(c_30, c_20)
+})

From 99fe477be15deca3ad5116984e41884e952fca30 Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Mon, 9 Dec 2024 11:43:09 -0700
Subject: [PATCH 7/8] alert users about slight change in numbers

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index f0d15ce..2d7bf51 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,7 @@
 # volcalc (development version)
 
 * Added a `validate = TRUE` option to `calc_vol()` and `get_fx_groups()` that returns `NA`s when there are suspected errors in parsing SMILES or .mol files. This is unfortunately not available on Windows due to differences in the windows version of `ChemmineOB`
-* Users can now supply a temperature at which to calculate volatility estimates.
+* Users can now supply a temperature at which to calculate volatility estimates. As a result, RVI values may now be *slightly* different compared to those from `volcalc` v2.1.2 due to rounding differences.
 * Inputs to `calc_vol()` supplied as a named vector of SMILES strings now properly pass the names to `ChemmineR::smile2sdf()` where they are used as the molecule name.
 
 # volcalc 2.1.2

From e101e9274259edfb22219ca2582b748dd5e2831a Mon Sep 17 00:00:00 2001
From: Eric Scott <scottericr@gmail.com>
Date: Mon, 9 Dec 2024 11:44:47 -0700
Subject: [PATCH 8/8] indentation

---
 R/simpol1.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/simpol1.R b/R/simpol1.R
index c546454..16272f5 100644
--- a/R/simpol1.R
+++ b/R/simpol1.R
@@ -77,7 +77,7 @@ simpol1 <- function(fx_groups, meredith = TRUE, temp_c = 20) {
       vb_00 = (b_k["b0"]  * 1), #b_0(T) is an intercept/constant.  
       vb_01 = (b_k["b1"]  * .data$carbons),
       vb_02 = (b_k["b2"]  * .data$carbons_asa),
-      vb_03 = (b_k["b3"]	 * .data$rings_aromatic),
+      vb_03 = (b_k["b3"]	* .data$rings_aromatic),
       vb_04 = (b_k["b4"]  * .data$rings_aliphatic),
       vb_05 = (b_k["b5"]  * .data$carbon_dbl_bonds_aliphatic),
       vb_06 = (b_k["b6"]  * .data$CCCO_aliphatic_ring),