Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 167 additions & 2 deletions src/pychemelt/monomer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
re_arrange_params,
re_arrange_predictions,
subset_data,
transform_to_list
transform_to_list,
ci_dict_to_summary_df
)

from .utils.fitting import (
Expand All @@ -52,6 +53,7 @@ def __init__(self, name='Test'):
self.thermodynamic_params_guess = None
self.nr_den = 0 # Number of denaturant concentrations
self.oligomeric = False # Flag for oligomer for plotting
self.model_scale_factor = False # Flag for model scale factor for fitting

def set_denaturant_concentrations(self, concentrations=None):

Expand Down Expand Up @@ -366,7 +368,6 @@ def fit_thermal_unfolding_global(
List of two values, the lower and upper bounds for the Tm value. If None, bounds set automatically
cp_value : float, optional
If provided, the Cp value is fixed to this value, the bounds are ignored

Notes
-----
This is a heavy routine that creates/updates many fitting-related attributes, including:
Expand All @@ -376,6 +377,9 @@ def fit_thermal_unfolding_global(
- flags: global_fit_done, fit_m_dep, limited_tm, limited_dh, limited_cp, fixed_cp
"""

self.global_global_global_fit_done = False # Reset the flag for the more complex global fit
self.global_global_fit_done = False # Reset the flag for the global fit with shared slopes

# Requires Cp0
if self.Cp0 <= 0:
raise ValueError('Cp0 must be positive. Please run guess_Cp before fitting globally.')
Expand Down Expand Up @@ -629,6 +633,10 @@ def fit_thermal_unfolding_global(
self.create_params_df()
self.create_dg_df()

# Add the kwargs and fit_fx to the object for potential later use in leave-one-out analysis
self.kwargs_fit = kwargs
self.fit_fx = fit_fx

return None

def fit_thermal_unfolding_global_global(self):
Expand All @@ -644,6 +652,8 @@ def fit_thermal_unfolding_global_global(self):
Updates global fitting attributes and sets `global_global_fit_done` when complete.
"""

self.global_global_global_fit_done = False # Reset the flag for the more complex global fit

# Requires global fit done
if not self.global_fit_done:
self.fit_thermal_unfolding_global()
Expand Down Expand Up @@ -832,6 +842,9 @@ def fit_thermal_unfolding_global_global(self):

self.global_global_fit_done = True

self.kwargs_fit = kwargs
self.fit_fx = fit_fx

return None

def fit_thermal_unfolding_global_global_global(
Expand Down Expand Up @@ -1157,6 +1170,8 @@ def fit_thermal_unfolding_global_global_global(
self.create_dg_df()

self.global_global_global_fit_done = True

self.model_scale_factor = model_scale_factor

# Obtained the scaled signal too
if model_scale_factor:
Expand All @@ -1178,6 +1193,156 @@ def fit_thermal_unfolding_global_global_global(
self.signal_lst_multiple_scaled = signal_scaled
self.predicted_lst_multiple_scaled = predicted_scaled

# Store the kwargs and fit_fx for potential later use in leave-one-out analysis
self.kwargs_fit = kwargs
self.fit_fx = fit_fx

return None

def leave_one_out_cross_validation(self):

"""
Perform a leave-one-out cross-validation by fitting the model multiple times, each time leaving out one of the datasets (signal-temperature pairs).
"""

kwargs = deepcopy(self.kwargs_fit) # We need a deep copy to avoid problems with modifying the original kwargs in place across iterations
fit_fx = self.fit_fx

kwargs['initial_parameters'] = self.global_fit_params
kwargs['low_bounds'] = self.low_bounds
kwargs['high_bounds'] = self.high_bounds

Tms = []
DHs = []
Cps = []
m0s = []

n = len(self.signal_lst_expanded)

for i in range(n):

kwargs['list_of_temperatures'] = self.temp_lst_expanded[:i] + self.temp_lst_expanded[i+1:]
kwargs['list_of_signals'] = self.signal_lst_expanded[:i] + self.signal_lst_expanded[i+1:]
kwargs['denaturant_concentrations'] = np.delete(self.denaturant_concentrations_expanded, i)

# Check if we have a global global global fit with scale factor, and in that case we need to adjust the scale_factor_exclude_ids
if self.model_scale_factor and self.global_global_global_fit_done:

# We need to create a new list to allow modifications. Read the original list from the kwargs to avoid modifying it in place across iterations
scale_factor_exclude_ids = [x for x in self.kwargs_fit.get('scale_factor_exclude_ids')]

for j, sf in enumerate(scale_factor_exclude_ids):

# If the scale factor ID is the same as i, we need to remove it
if sf == i:
scale_factor_exclude_ids.pop(j)
break

# If the scale factor ID is greater than i, we need to decrease it by 1, because we are removing one dataset before it
elif sf > i:
scale_factor_exclude_ids[j] -= 1

# If the scale factor ID is less than i, we don't need to do anything
####

kwargs['scale_factor_exclude_ids'] = scale_factor_exclude_ids

global_fit_params, _, _, _, _ = fit_fx(**kwargs)

Tm = global_fit_params[0]
low_bound_Tm = self.low_bounds[0] # Make sure to compare in Kelvin units
high_bound_Tm = self.high_bounds[0] # Make sure to compare in Kelvin units

tm_is_acceptable = Tm >= low_bound_Tm + 0.5 and Tm <= high_bound_Tm - 0.5

DH = global_fit_params[1]

DH_is_acceptable = DH >= self.low_bounds[1] + 5 and DH <= self.high_bounds[1] - 5

if self.cp_value is None:

Cp = global_fit_params[2]

Cp_is_acceptable = Cp >= self.low_bounds[2] + 0.1 and Cp <= self.high_bounds[2] - 0.1

id = 2 + (self.cp_value is None)
m0 = global_fit_params[id]

m0_is_acceptable = m0 >= self.low_bounds[id] + 0.1 and m0 <= self.high_bounds[id] - 0.1

keep_fit = tm_is_acceptable and DH_is_acceptable and m0_is_acceptable and (Cp_is_acceptable if self.cp_value is None else True)

if not keep_fit:
n -= 1
continue

Tms.append(Tm)
DHs.append(DH)

id = 2
if self.cp_value is None:
Cps.append(Cp)
id += 1

m0s.append(m0)

# Create a DataFrame to store the results, with the mean and standard deviation of the parameters across the leave-one-out fits

params = ['Tm','DH']

if self.cp_value is None:
params.append('Cp')

params.append('m0')

loo_values = [np.mean(Tms), np.mean(DHs)]
if self.cp_value is None:
loo_values.append(np.mean(Cps))
loo_values.append(np.mean(m0s))

loo_std = [np.std(Tms), np.std(DHs)]
if self.cp_value is None:
loo_std.append(np.std(Cps))
loo_std.append(np.std(m0s))

if n == 0:
self.loo_df = pd.DataFrame({
'Parameter': params,
'LOO_Mean': [np.nan for _ in params],
'LOO_Std': [np.nan for _ in params],
'N fits': [0 for _ in params]
})
raise ValueError("All leave-one-out fits were rejected based on (thermodynamic) parameter bounds. "
"In other words, the fitted parameters (Tm, DH, Cp, m-value) were too close to the specified bounds in all fits."
"Please check the bounds, model, and the data quality.")

self.loo_df = pd.DataFrame({
'Parameter': params,
'Leave_one_out_mean': loo_values,
'Leave_one_out_std': loo_std,
'N fits': n
})

return None

def calculate_confidence_intervals(self, percentage=0.95):

# Find the right amount of param names
desired_params = ['Tm', 'DH', 'Cp', 'm0']

param_names = [x for x in list(self.result.params.keys()) if any(param in x for param in desired_params)]

ci_results = compute_asymmetric_confidence_intervals(
minimizer = self.minimizer,
result = self.result,
param_names = param_names,
sigmas = [percentage]
# If any of the sigma values is less than 1, that will be interpreted as a probability
# https://lmfit.github.io/lmfit-py/confidence.html
)

self.ci_df = ci_dict_to_summary_df(ci_results,percentage=percentage)

return None

def signal_to_df(self, signal_type='raw', scaled=False):
Expand Down
9 changes: 8 additions & 1 deletion src/pychemelt/utils/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
import lmfit
import numpy as np

from scipy.optimize import curve_fit
from scipy.optimize import least_squares

Expand Down Expand Up @@ -689,6 +690,8 @@ def residuals(pars):
# Convert Tm back to Celsius for the returned vector
if tm_value is None:
global_fit_params[0] = temperature_to_celsius(global_fit_params[0])
low_bounds[0] = temperature_to_celsius(low_bounds[0])
high_bounds[0] = temperature_to_celsius(high_bounds[0])

return global_fit_params, cov, predicted_lst, result, minimizer

Expand Down Expand Up @@ -1486,6 +1489,8 @@ def residuals(pars):
# Convert the Tm back to Celsius
if tm_value is None:
global_fit_params[0] = temperature_to_celsius(global_fit_params[0])
low_bounds[0] = temperature_to_celsius(low_bounds[0])
high_bounds[0] = temperature_to_celsius(high_bounds[0])

return global_fit_params, cov, predicted_lst, result, minimizer

Expand Down Expand Up @@ -2305,6 +2310,8 @@ def residuals(pars):

# Convert the Tm to Celsius
global_fit_params[0] = temperature_to_celsius(global_fit_params[0])
low_bounds[0] = temperature_to_celsius(low_bounds[0])
high_bounds[0] = temperature_to_celsius(high_bounds[0])

return global_fit_params, cov, predicted_lst, result, minimizer

Expand Down Expand Up @@ -3372,4 +3379,4 @@ def compute_asymmetric_confidence_intervals(minimizer, result, param_names=['Tm'
for sigma_val, value in ci[param]:
ci_results[param].append((sigma_val, value))

return ci_results
return ci_results
76 changes: 73 additions & 3 deletions src/pychemelt/utils/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
"""
import re
import numpy as np
import pandas as pd
import itertools

from collections import Counter

from .math import shift_temperature, relative_errors
from .math import (
shift_temperature,
relative_errors,
temperature_to_celsius
)

from .fitting import (
fit_line_robust,
Expand Down Expand Up @@ -43,7 +48,8 @@
'parse_number',
'are_all_strings_numeric',
'is_float',
'transform_to_list'
'transform_to_list',
'ci_dict_to_summary_df'
]

def transform_to_list(element_or_list):
Expand Down Expand Up @@ -818,4 +824,68 @@ def is_float(element):
parse_number(element)
return True
except ValueError:
return False
return False


def ci_dict_to_summary_df(ci_dict,percentage=0.95):

"""
Convert lmfit confidence interval dictionary into a summary DataFrame.

Parameters
----------
ci_dict : dict
Dictionary containing confidence intervals for fitted parameters, typically in the format returned by lmfit.

Returns
-------
pd.DataFrame
DataFrame summarizing the confidence intervals for each parameter, with columns:
- Parameter: Name of the fitted parameter
- Lower_CI: Lower bound of the confidence interval
- Value: Best-fit value of the parameter
- Upper_CI: Upper bound of the confidence interval
"""

# If Tm is among the parameters, convert confidence intervals back to Celsius
# and change the parameter name in the results for clarity
if 'Tm' in ci_dict:
ci_dict['Tm (°C)'] = []
for sigma_val, value in ci_dict.pop('Tm'):
value_celsius = temperature_to_celsius(value)
ci_dict['Tm (°C)'].append((sigma_val, value_celsius))

# Replace DH for ΔH
if 'DHm' in ci_dict:
ci_dict['ΔH'] = []
for sigma_val, value in ci_dict.pop('DHm'):
ci_dict['ΔH'].append((sigma_val, value))

# Replace Cp0 for ΔCp
if 'Cp0' in ci_dict:
ci_dict['ΔCp'] = []
for sigma_val, value in ci_dict.pop('Cp0'):
ci_dict['ΔCp'].append((sigma_val, value))

# Replace m0 for m-value
if 'm0' in ci_dict:
ci_dict['m-value'] = []
for sigma_val, value in ci_dict.pop('m0'):
ci_dict['m-value'].append((sigma_val, value))

rows = []

for param, vals in ci_dict.items():

lower = round(float(vals[0][1]), 2)
best = round(float(vals[1][1]), 2)
upper = round(float(vals[2][1]), 2)

rows.append({
"Parameter": param,
f"Lower_CI_{percentage*100}%": lower,
"Value": best,
f"Upper_CI_{percentage*100}%": upper,
})

return pd.DataFrame(rows)
Loading
Loading