From ed9681887e247ba4c4e98a6ce39d623d394b7906 Mon Sep 17 00:00:00 2001 From: braniii Date: Fri, 3 Mar 2023 17:36:07 +0100 Subject: [PATCH 1/2] Revert "Remove draft of buchete-hummer projection." This reverts commit ab8a1b7fae2281f485bb27506c6993798361c8e2. --- src/msmhelper/msm/__init__.py | 4 + src/msmhelper/msm/tests.py | 170 ++++++++++++++++++++++++++++++++++ test/test_msm_tests.py | 27 ++++++ 3 files changed, 201 insertions(+) diff --git a/src/msmhelper/msm/__init__.py b/src/msmhelper/msm/__init__.py index 455a8af..0eae0ca 100644 --- a/src/msmhelper/msm/__init__.py +++ b/src/msmhelper/msm/__init__.py @@ -12,6 +12,8 @@ """ __all__ = [ + 'bh_test', + 'buchete_hummer_test', 'chapman_kolmogorov_test', 'ck_test', 'estimate_markov_model', @@ -27,6 +29,8 @@ ] from .tests import ( + bh_test, + buchete_hummer_test, chapman_kolmogorov_test, ck_test, ) diff --git a/src/msmhelper/msm/tests.py b/src/msmhelper/msm/tests.py index ee755f5..fbacd82 100644 --- a/src/msmhelper/msm/tests.py +++ b/src/msmhelper/msm/tests.py @@ -142,6 +142,176 @@ def _chapman_kolmogorov_test_md(trajs, tmin, tmax, steps=30): } +@decorit.alias('bh_test') +def buchete_hummer_test(trajs, lagtime, tmax): + r"""Calculate the Buchete Hummer test. + + This method estimates the Buchete Hummer autocorrelation test. Projecting + the state trajectory onto the right eigenvectors of the row normalized + transition matrix + + $$C_{lm} (t) = \langle \phi_l[s(\tau +t)] \phi_m[S(\tau)]\rangle$$ + + where \(\phi_i\) is the \(i\)-th right eigenvector. Buchete and Hummer[^2] + showed that for a Markovian system it obeys an exponentil decay, + corresponds to + + $$C_{lm} (t) = \delta_{lm} \exp(-t / t_k)$$ + + with the implied timescale \(t_k = - \tau_\text{lag} / \ln \lambda_k\). + + [^2]: Buchete and Hummer, **Coarse master equations for peptide folding + dynamics**, *J. Phys. Chem.*, 112, 6057-6069 (2008), + doi:[10.1021/jp0761665](https://doi.org/10.1021/jp0761665) + + Parameters + ---------- + trajs : StateTraj or list or ndarray or list of ndarray + State trajectory/trajectories. The states should start from zero and + need to be integers. + lagtime : int + Lagtimes for estimating the markov model given in [frames]. + tmax : int + Longest time to evaluate the CK equation given in [frames]. + + Returns + ------- + bhtest : dict + Dictionary holding for each lagtime the ckequation and with 'md' the + reference. + + """ + # format input + trajs = StateTraj(trajs) + + # check that lag times are array of integers + if not isinstance(lagtime, int) or lagtime < 0: + raise TypeError( + 'Lagtimes needs to be positive integers, but {0}.'.format(lagtime), + ) + + if not isinstance(tmax, int) or tmax < 0: + raise TypeError( + 'tmax needs to be a positive integer, but {0}'.format(tmax), + ) + + return _buchete_hummer_test(trajs, lagtime, tmax) + + +def _project_states_onto_vector(trajs, vector): + """Shift states to corresponding vector values. + + Parameters + ---------- + trajs : StateTraj or ndarray or list or list of ndarrays + 1D data or a list of data. + vector : ndarray or list + Values which will be used instead of states. + + Returns + ------- + array : ndarray + Shifted data in same shape as input. + + """ + trajs = StateTraj(trajs) + vector = np.atleast_1d(vector) + + # check data-type + if len(vector) != trajs.nstates: + raise TypeError( + 'Vector of wrong length. {0} '.format(len(vector)) + + 'vs. {0}'.format(trajs.nstates), + ) + + # flatten data + limits = np.cumsum([len(traj) for traj in trajs.trajs]) + array = trajs.trajs_flatten + + # convert to np.array + states = np.arange(trajs.nstates) + + # shift data + conv = np.arange(trajs.nstates, dtype=np.float64) + conv[states] = vector + array = conv[array] + + # reshape and return + return np.split(array, limits)[:-1] + + +def _buchete_hummer_test(trajs, lagtime, tmax): + r"""Calculate the Buchete Hummer equation.""" + times = _calc_times(lagtime=lagtime, tmax=tmax) + ntimes = len(times) + bheq = np.empty((trajs.nstates - 1, ntimes)) + + # estimate Markov model + tmat, _ = msm.estimate_markov_model(trajs, lagtime=lagtime) + + # symmetrize matrix + tmat = np.sqrt(tmat * tmat.T) + + # get orthogonal eigenvectors of symmetric matrix + evals, evecs = msm.utils.linalg.right_eigenvectors(tmat) + pi, *evecs = evecs # first eigenvector is equilibrium population + + # stationary distribution + pi = pi**2 / np.sum(pi**2) + + # normalize eigenvectors to be eigenvectors of the non symmetric transition + # matrix + evecs = np.array(evecs) / np.sqrt(pi[np.newaxis, :]) + + for idx, evec in enumerate(evecs): + trajs_proj = _project_states_onto_vector(trajs, evec) + + if not numba.config.DISABLE_JIT: # pragma: no cover + trajs_proj = numba.typed.List(trajs_proj) + + bheq[idx] = _autocorrelation(trajs_proj, times) + + # for idx_col, time in enumerate(times): + # # autocorrelation function + # c_nn, norm = 0, 0 + # for traj_proj in trajs_proj: + # c_nn += np.sum(traj_proj[: -time] * traj_proj[time:]) + # norm += len(traj_proj[time:]) +# + # bheq[idx, idx_col] = c_nn / norm + + # calculate reference (MD) + bheq_ref = np.empty((trajs.nstates - 1, ntimes)) + for idx, eigenval in enumerate(evals[1:]): + bheq_ref[idx] = np.exp(times * np.log(eigenval) / lagtime) + + return { + lagtime: {'bh': bheq, 'time': times}, + 'md': {'bh': bheq_ref, 'time': times}, + } + + +@numba.njit +def _autocorrelation(trajs, times): + """Generate a simple transition count matrix from multiple trajectories.""" + # initialize matrix + bheq = np.zeros(len(times), dtype=np.float64) + + for idx, time in enumerate(times): + # autocorrelation function + c_nn, norm = 0, 0 + for traj in trajs: + for val_from, val_to in zip( # noqa: WPS519 + traj[: -time], traj[time:], + ): + c_nn += val_from * val_to + norm += len(traj[time:]) + + bheq[idx] = c_nn / norm + + return bheq + + def _calc_times(lagtime, tmax): """Return time array.""" steps = int(np.floor(tmax / lagtime)) diff --git a/test/test_msm_tests.py b/test/test_msm_tests.py index aa4adbc..887e2ef 100644 --- a/test/test_msm_tests.py +++ b/test/test_msm_tests.py @@ -67,3 +67,30 @@ def test__chapman_kolmogorov_test(trajs, lagtime, tmax, result): ) else: np.testing.assert_array_almost_equal(cktest[key], result[key]) + + +@pytest.mark.parametrize('trajs, lagtime, tmax', [ + ([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1], 2, 5), +]) +def test_buchete_hummer_test(trajs, lagtime, tmax): + """Test Buchete Hummer test.""" + # check float as lag times + with pytest.raises(TypeError): + _ = tests.bh_test(trajs, lagtime=[1.2], tmax=tmax) + + # check negative lag times + with pytest.raises(TypeError): + _ = tests.bh_test(trajs, lagtime=-1, tmax=tmax) + + # check maximal time negative + with pytest.raises(TypeError): + _ = tests.bh_test(trajs, lagtime=lagtime, tmax=-1) + + # check maximal time float + with pytest.raises(TypeError): + _ = tests.bh_test(trajs, lagtime=lagtime, tmax=5.7) + + # check if all keys exists + bhtest = tests.bh_test(trajs, lagtime=lagtime, tmax=tmax) + assert 'md' in bhtest + assert lagtime in bhtest From 40847e92e9f67660c54fd2bd1be259870a955c82 Mon Sep 17 00:00:00 2001 From: braniii Date: Fri, 3 Mar 2023 17:36:28 +0100 Subject: [PATCH 2/2] Revert "Remove unneeded imports of Buchete-Hummer as well." This reverts commit d669f3a678f9bc3c5ea559592bf6f02bb4f122d8. --- src/msmhelper/msm/tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/msmhelper/msm/tests.py b/src/msmhelper/msm/tests.py index fbacd82..1507b7f 100644 --- a/src/msmhelper/msm/tests.py +++ b/src/msmhelper/msm/tests.py @@ -8,9 +8,10 @@ # Copyright (c) 2019-2023, Daniel Nagel # All rights reserved. import decorit +import numba import numpy as np -from msmhelper import utils +from msmhelper import msm, utils from msmhelper.statetraj import LumpedStateTraj, StateTraj