From 6c6a9f8749ba5d1b6b95d5a192bc387c3d64cc54 Mon Sep 17 00:00:00 2001 From: Julius Schwartz Date: Thu, 19 Aug 2021 20:33:12 +0100 Subject: [PATCH 1/9] new file --- GP/random_walk.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 GP/random_walk.py diff --git a/GP/random_walk.py b/GP/random_walk.py new file mode 100644 index 0000000..a2e83c4 --- /dev/null +++ b/GP/random_walk.py @@ -0,0 +1,74 @@ +# Author: Henry Moss & Ryan-Rhys Griffiths +""" +Molecule kernels for Gaussian Process Regression implemented in GPflow. +""" + +import gpflow +import tensorflow as tf + + +class RandomWalk(gpflow.kernels.Kernel): + def __init__(self, uniform_probabilities=False, geometric=True): + super().__init__() + self.uniform_probabilities=uniform_probabilities + self.geometric = geometric + + def K(self, X, X2=None): + if X2 is None: + X2 = X + + X_is_X2 = X == X2 + + eigenvals, eigenvecs = tf.linalg.eigh(X) + flanking_factors = self._generate_flanking_factors(eigenvecs) + + if X_is_X2: + eigenvals_2, eigenvecs_2 = eigenvals, eigenvecs + flanking_factors_2 = flanking_factors + else: + eigenvals_2, eigenvecs_2 = tf.linalg.eigh(X2) + flanking_factors_2 = self._generate_flanking_factors(eigenvecs_2) + + k_matrix = tf.zeros((X.shape[0], X2.shape[0])) + + for i in k_matrix.shape[0]: + for j in k_matrix.shape[1]: + + if X_is_X2 and j < i: + k_matrix[i, j] = k_matrix[j, i] + continue + + flanking_factor = tf.linalg.LinearOperatorKronecker(flanking_factors[i], flanking_factors_2[j]) + diagonal = tf.linalg.LinearOperatorKronecker(eigenvecs[i], eigenvecs_2[j]) + + if self.geometric: + power_series = tf.linalg.diag(1 / 1 - diagonal) + else: + power_series = tf.linalg.diag(tf.exp(diagonal)) + + k_matrix[i, j] = tf.linalg.matmul( + flanking_factor, + tf.linalg.matmul( + power_series, + tf.transpose(flanking_factor, perm=[-2, -1]) + ) + ) + + return k_matrix + + def K_diag(self, X): + return tf.linalg.diag(X) ## TODO: implement + + def _generate_flanking_factors(self, eigenvecs): + flanking_factors = [] + + for eigenvec in eigenvecs: + start_stop_probs = tf.ones((1, eigenvec.shape[0])) + if self.uniform_probabilities: + start_stop_probs = tf.divide(start_stop_probs, eigenvec.shape(0)) + + flanking_factors.append( + tf.linalg.matmul(start_stop_probs, eigenvec) + ) + + return flanking_factors From edd15f11a14830b33b3bca1c352ad2ef24abb28b Mon Sep 17 00:00:00 2001 From: Julius Schwartz Date: Thu, 19 Aug 2021 20:38:32 +0100 Subject: [PATCH 2/9] fix new dir location --- GP/{ => kernels}/random_walk.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename GP/{ => kernels}/random_walk.py (100%) diff --git a/GP/random_walk.py b/GP/kernels/random_walk.py similarity index 100% rename from GP/random_walk.py rename to GP/kernels/random_walk.py From c4f49c50580a48872d221ebbb1061ca2d4dd8bf2 Mon Sep 17 00:00:00 2001 From: Julius Date: Sun, 5 Sep 2021 15:40:38 +0100 Subject: [PATCH 3/9] modified sys.path in test file --- GP/__init__.py | 0 GP/kernels/__init__.py | 0 GP/kernels/random_walk.py | 7 ++++--- tests/__init__.py | 0 tests/kernels/__init__.py | 0 tests/kernels/random_walk.py | 29 +++++++++++++++++++++++++++++ 6 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 GP/__init__.py create mode 100644 GP/kernels/__init__.py create mode 100644 tests/__init__.py create mode 100644 tests/kernels/__init__.py create mode 100644 tests/kernels/random_walk.py diff --git a/GP/__init__.py b/GP/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/GP/kernels/__init__.py b/GP/kernels/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/GP/kernels/random_walk.py b/GP/kernels/random_walk.py index a2e83c4..3e3d5b3 100644 --- a/GP/kernels/random_walk.py +++ b/GP/kernels/random_walk.py @@ -8,10 +8,11 @@ class RandomWalk(gpflow.kernels.Kernel): - def __init__(self, uniform_probabilities=False, geometric=True): + def __init__(self, uniform_probabilities=False, geometric=True, weight=0.1): super().__init__() self.uniform_probabilities=uniform_probabilities self.geometric = geometric + self.weight = weight def K(self, X, X2=None): if X2 is None: @@ -39,7 +40,7 @@ def K(self, X, X2=None): continue flanking_factor = tf.linalg.LinearOperatorKronecker(flanking_factors[i], flanking_factors_2[j]) - diagonal = tf.linalg.LinearOperatorKronecker(eigenvecs[i], eigenvecs_2[j]) + diagonal = self.weight * tf.linalg.LinearOperatorKronecker(eigenvecs[i], eigenvecs_2[j]) if self.geometric: power_series = tf.linalg.diag(1 / 1 - diagonal) @@ -57,7 +58,7 @@ def K(self, X, X2=None): return k_matrix def K_diag(self, X): - return tf.linalg.diag(X) ## TODO: implement + return tf.linalg.tensor_diag_part(self.K(X)) def _generate_flanking_factors(self, eigenvecs): flanking_factors = [] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/kernels/__init__.py b/tests/kernels/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/kernels/random_walk.py b/tests/kernels/random_walk.py new file mode 100644 index 0000000..83f978a --- /dev/null +++ b/tests/kernels/random_walk.py @@ -0,0 +1,29 @@ +# Author: Henry Moss & Ryan-Rhys Griffiths +""" +Molecule kernels for Gaussian Process Regression implemented in GPflow. +""" + +import gpflow +import tensorflow as tf +import grakel +import pytest +from rdkit.Chem import MolFromSmiles +from rdkit.Chem.rdmolops import GetAdjacencyMatrix +import sys + +sys.path.append('/Users/juliusschwartz/Mystuff/FlowMO') + +from GP.kernels.random_walk import RandomWalk + +test_smiles = [] +test_dataset_file = '../../smiles_enumeration/enumerated_datasets/ESOL/X_test_split_aug_x2_split_0.txt' +lines = open(test_dataset_file).readlines() +for line in open(test_dataset_file).readlines(): + test_smiles.append(line.strip('\n')) + +adj_mats = [GetAdjacencyMatrix(MolFromSmiles(smiles)) for smiles in test_smiles] +tensor_adj_mats = [tf.convert_to_tensor(adj_mat) for adj_mat in adj_mats] +grakel_graphs = [grakel.Graph(adj_mat) for adj_mat in adj_mats] + +random_walk_grakel = grakel.kernels.RandomWalk() +grakel_results = random_walk_grakel.fit_transform(grakel_graphs) From 783f2099942fc0fbdb360a2bc3c01fd304e24fdd Mon Sep 17 00:00:00 2001 From: Julius Date: Wed, 29 Sep 2021 18:16:24 +0100 Subject: [PATCH 4/9] pass basic tests --- GP/{kernels => kernel_modules}/__init__.py | 0 GP/kernel_modules/kernel_utils.py | 14 +++ GP/kernel_modules/random_walk.py | 102 +++++++++++++++++++++ GP/kernels/random_walk.py | 75 --------------- tests/kernels/random_walk.py | 29 ------ tests/kernels/test_random_walk.py | 55 +++++++++++ 6 files changed, 171 insertions(+), 104 deletions(-) rename GP/{kernels => kernel_modules}/__init__.py (100%) create mode 100644 GP/kernel_modules/kernel_utils.py create mode 100644 GP/kernel_modules/random_walk.py delete mode 100644 GP/kernels/random_walk.py delete mode 100644 tests/kernels/random_walk.py create mode 100644 tests/kernels/test_random_walk.py diff --git a/GP/kernels/__init__.py b/GP/kernel_modules/__init__.py similarity index 100% rename from GP/kernels/__init__.py rename to GP/kernel_modules/__init__.py diff --git a/GP/kernel_modules/kernel_utils.py b/GP/kernel_modules/kernel_utils.py new file mode 100644 index 0000000..9899b15 --- /dev/null +++ b/GP/kernel_modules/kernel_utils.py @@ -0,0 +1,14 @@ +# Author: Henry Moss & Ryan-Rhys Griffiths +""" +Molecule kernels for Gaussian Process Regression implemented in GPflow. +""" + +import tensorflow as tf + + +def normalize(k_matrix): + k_matrix_diagonal = tf.linalg.diag_part(k_matrix) + squared_normalization_factor = tf.multiply(tf.expand_dims(k_matrix_diagonal, 1), + tf.expand_dims(k_matrix_diagonal, 0)) + + return tf.divide(k_matrix, tf.sqrt(squared_normalization_factor)) diff --git a/GP/kernel_modules/random_walk.py b/GP/kernel_modules/random_walk.py new file mode 100644 index 0000000..549217a --- /dev/null +++ b/GP/kernel_modules/random_walk.py @@ -0,0 +1,102 @@ +# Author: Henry Moss & Ryan-Rhys Griffiths +""" +Molecule kernels for Gaussian Process Regression implemented in GPflow. +""" + +import gpflow +import numpy as np +import tensorflow as tf + +from .kernel_utils import normalize + + +class RandomWalk(gpflow.kernels.Kernel): + def __init__(self, uniform_probabilities=False, series_type='geometric', weight=0.1, normalize=True): + super().__init__() + self.uniform_probabilities=uniform_probabilities + if series_type == 'geometric': + self.geometric = True + elif series_type == 'exponential': + self.geometric = False + self.weight = weight + self.normalize = normalize + + def K(self, X, X2=None): + if X2 is None: + X2 = X + + X_is_X2 = X == X2 + + eigenvecs, eigenvals = [], [] + eigenvecs_2, eigenvals_2 = [], [] + + for adj_mat in X: + val, vec = tf.linalg.eigh(tf.cast(adj_mat, tf.float64)) + eigenvals.append(val) + eigenvecs.append(vec) + + flanking_factors = self._generate_flanking_factors(eigenvecs) + + if X_is_X2: + eigenvals_2, eigenvecs_2 = eigenvals, eigenvecs + flanking_factors_2 = flanking_factors + else: + for adj_mat in X2: + val, vec = tf.linalg.eigh(tf.cast(adj_mat, tf.float64)) + eigenvals_2.append(val) + eigenvecs_2.append(vec) + flanking_factors_2 = self._generate_flanking_factors(eigenvecs_2) + + k_matrix = np.zeros((len(X), len(X2))) + + for idx_1 in range(k_matrix.shape[0]): + for idx_2 in range(k_matrix.shape[1]): + + if X_is_X2 and idx_2 < idx_1: + k_matrix[idx_1, idx_2] = k_matrix[idx_2, idx_1] + continue + + flanking_factor = tf.linalg.LinearOperatorKronecker( + [tf.linalg.LinearOperatorFullMatrix(flanking_factors[idx_1]), + tf.linalg.LinearOperatorFullMatrix(flanking_factors_2[idx_2]) + ]).to_dense() + + diagonal = self.weight * tf.linalg.LinearOperatorKronecker( + [tf.linalg.LinearOperatorFullMatrix(tf.expand_dims(eigenvals[idx_1], axis=0)), + tf.linalg.LinearOperatorFullMatrix(tf.expand_dims(eigenvals_2[idx_2], axis=0)) + ]).to_dense() + + if self.geometric: + power_series = tf.linalg.diag(1 / (1 - diagonal)) + else: + power_series = tf.linalg.diag(tf.exp(diagonal)) + + k_matrix[idx_1, idx_2] = tf.linalg.matmul( + flanking_factor, + tf.linalg.matmul( + power_series, + tf.transpose(flanking_factor, perm=[1, 0]) + ) + ) + + if self.normalize: + return tf.convert_to_tensor(normalize(k_matrix)) + + return tf.convert_to_tensor(k_matrix) + + def K_diag(self, X): + return tf.linalg.tensor_diag_part(self.K(X)) + + def _generate_flanking_factors(self, eigenvecs): + flanking_factors = [] + + for eigenvec in eigenvecs: + start_stop_probs = tf.ones((1, eigenvec.shape[0]), tf.float64) + if self.uniform_probabilities: + start_stop_probs = tf.divide(start_stop_probs, eigenvec.shape(0)) + + flanking_factors.append( + tf.linalg.matmul(start_stop_probs, eigenvec) + ) + + return flanking_factors diff --git a/GP/kernels/random_walk.py b/GP/kernels/random_walk.py deleted file mode 100644 index 3e3d5b3..0000000 --- a/GP/kernels/random_walk.py +++ /dev/null @@ -1,75 +0,0 @@ -# Author: Henry Moss & Ryan-Rhys Griffiths -""" -Molecule kernels for Gaussian Process Regression implemented in GPflow. -""" - -import gpflow -import tensorflow as tf - - -class RandomWalk(gpflow.kernels.Kernel): - def __init__(self, uniform_probabilities=False, geometric=True, weight=0.1): - super().__init__() - self.uniform_probabilities=uniform_probabilities - self.geometric = geometric - self.weight = weight - - def K(self, X, X2=None): - if X2 is None: - X2 = X - - X_is_X2 = X == X2 - - eigenvals, eigenvecs = tf.linalg.eigh(X) - flanking_factors = self._generate_flanking_factors(eigenvecs) - - if X_is_X2: - eigenvals_2, eigenvecs_2 = eigenvals, eigenvecs - flanking_factors_2 = flanking_factors - else: - eigenvals_2, eigenvecs_2 = tf.linalg.eigh(X2) - flanking_factors_2 = self._generate_flanking_factors(eigenvecs_2) - - k_matrix = tf.zeros((X.shape[0], X2.shape[0])) - - for i in k_matrix.shape[0]: - for j in k_matrix.shape[1]: - - if X_is_X2 and j < i: - k_matrix[i, j] = k_matrix[j, i] - continue - - flanking_factor = tf.linalg.LinearOperatorKronecker(flanking_factors[i], flanking_factors_2[j]) - diagonal = self.weight * tf.linalg.LinearOperatorKronecker(eigenvecs[i], eigenvecs_2[j]) - - if self.geometric: - power_series = tf.linalg.diag(1 / 1 - diagonal) - else: - power_series = tf.linalg.diag(tf.exp(diagonal)) - - k_matrix[i, j] = tf.linalg.matmul( - flanking_factor, - tf.linalg.matmul( - power_series, - tf.transpose(flanking_factor, perm=[-2, -1]) - ) - ) - - return k_matrix - - def K_diag(self, X): - return tf.linalg.tensor_diag_part(self.K(X)) - - def _generate_flanking_factors(self, eigenvecs): - flanking_factors = [] - - for eigenvec in eigenvecs: - start_stop_probs = tf.ones((1, eigenvec.shape[0])) - if self.uniform_probabilities: - start_stop_probs = tf.divide(start_stop_probs, eigenvec.shape(0)) - - flanking_factors.append( - tf.linalg.matmul(start_stop_probs, eigenvec) - ) - - return flanking_factors diff --git a/tests/kernels/random_walk.py b/tests/kernels/random_walk.py deleted file mode 100644 index 83f978a..0000000 --- a/tests/kernels/random_walk.py +++ /dev/null @@ -1,29 +0,0 @@ -# Author: Henry Moss & Ryan-Rhys Griffiths -""" -Molecule kernels for Gaussian Process Regression implemented in GPflow. -""" - -import gpflow -import tensorflow as tf -import grakel -import pytest -from rdkit.Chem import MolFromSmiles -from rdkit.Chem.rdmolops import GetAdjacencyMatrix -import sys - -sys.path.append('/Users/juliusschwartz/Mystuff/FlowMO') - -from GP.kernels.random_walk import RandomWalk - -test_smiles = [] -test_dataset_file = '../../smiles_enumeration/enumerated_datasets/ESOL/X_test_split_aug_x2_split_0.txt' -lines = open(test_dataset_file).readlines() -for line in open(test_dataset_file).readlines(): - test_smiles.append(line.strip('\n')) - -adj_mats = [GetAdjacencyMatrix(MolFromSmiles(smiles)) for smiles in test_smiles] -tensor_adj_mats = [tf.convert_to_tensor(adj_mat) for adj_mat in adj_mats] -grakel_graphs = [grakel.Graph(adj_mat) for adj_mat in adj_mats] - -random_walk_grakel = grakel.kernels.RandomWalk() -grakel_results = random_walk_grakel.fit_transform(grakel_graphs) diff --git a/tests/kernels/test_random_walk.py b/tests/kernels/test_random_walk.py new file mode 100644 index 0000000..bf9fd45 --- /dev/null +++ b/tests/kernels/test_random_walk.py @@ -0,0 +1,55 @@ +# Author: Henry Moss & Ryan-Rhys Griffiths +""" +Molecule kernels for Gaussian Process Regression implemented in GPflow. +""" + +import sys + +import grakel +import numpy.testing as npt +import pandas as pd +import pytest +import tensorflow as tf +from rdkit.Chem import MolFromSmiles +from rdkit.Chem.rdmolops import GetAdjacencyMatrix + +from GP.kernel_modules.random_walk import RandomWalk + +sys.path.append('/Users/juliusschwartz/Mystuff/FlowMO') + +@pytest.fixture +def load_data(): + test_dataset_file = '../../datasets/ESOL.csv' + df = pd.read_csv(test_dataset_file) + return df["smiles"].to_list() + + +@pytest.mark.parametrize( + 'weight, series_type, p', + [ + (0.1, 'geometric', None), + (0.1, 'exponential', None), + #(0.3, 'geometric', None), TODO: Fix + (0.5, 'exponential', None), + #(0.2, 'geometric', 3), TODO: Implement + #(0.8, 'exponential', 3), TODO: Implement + ] +) +def test_random_walk_unlabelled(weight, series_type, p, load_data): + adj_mats = [GetAdjacencyMatrix(MolFromSmiles(smiles)) for smiles in load_data[:50]] + + tensor_adj_mats = [tf.convert_to_tensor(adj_mat) for adj_mat in adj_mats] + grakel_graphs = [grakel.Graph(adj_mat) for adj_mat in adj_mats] + + random_walk_grakel = grakel.kernels.RandomWalk(normalize=False, lamda=weight, kernel_type=series_type) + grakel_results = random_walk_grakel.fit_transform(grakel_graphs) + + random_walk_FlowMo = RandomWalk(series_type=series_type, weight=weight, normalize=False) + FlowMo_results = random_walk_FlowMo.K(tensor_adj_mats, tensor_adj_mats) + + import pdb; pdb.set_trace() + + npt.assert_almost_equal( + grakel_results, FlowMo_results.numpy(), + decimal=2 + ) From d608c16507afc2a18f14941908c8a18564595d4d Mon Sep 17 00:00:00 2001 From: Julius Date: Wed, 29 Sep 2021 18:34:26 +0100 Subject: [PATCH 5/9] remove debugger --- tests/kernels/test_random_walk.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/kernels/test_random_walk.py b/tests/kernels/test_random_walk.py index bf9fd45..d116946 100644 --- a/tests/kernels/test_random_walk.py +++ b/tests/kernels/test_random_walk.py @@ -47,8 +47,6 @@ def test_random_walk_unlabelled(weight, series_type, p, load_data): random_walk_FlowMo = RandomWalk(series_type=series_type, weight=weight, normalize=False) FlowMo_results = random_walk_FlowMo.K(tensor_adj_mats, tensor_adj_mats) - import pdb; pdb.set_trace() - npt.assert_almost_equal( grakel_results, FlowMo_results.numpy(), decimal=2 From d16f465f453a65e535de0912e84e7126f56d2f2a Mon Sep 17 00:00:00 2001 From: Julius Date: Wed, 29 Sep 2021 19:48:13 +0100 Subject: [PATCH 6/9] change method_type --- tests/kernels/test_random_walk.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/kernels/test_random_walk.py b/tests/kernels/test_random_walk.py index d116946..82cad03 100644 --- a/tests/kernels/test_random_walk.py +++ b/tests/kernels/test_random_walk.py @@ -29,8 +29,8 @@ def load_data(): [ (0.1, 'geometric', None), (0.1, 'exponential', None), - #(0.3, 'geometric', None), TODO: Fix - (0.5, 'exponential', None), + (0.3, 'geometric', None), #NB: requires `method_type="baseline" in grakel kernel constructor + (0.3, 'exponential', None), #(0.2, 'geometric', 3), TODO: Implement #(0.8, 'exponential', 3), TODO: Implement ] @@ -41,10 +41,10 @@ def test_random_walk_unlabelled(weight, series_type, p, load_data): tensor_adj_mats = [tf.convert_to_tensor(adj_mat) for adj_mat in adj_mats] grakel_graphs = [grakel.Graph(adj_mat) for adj_mat in adj_mats] - random_walk_grakel = grakel.kernels.RandomWalk(normalize=False, lamda=weight, kernel_type=series_type) + random_walk_grakel = grakel.kernels.RandomWalk(normalize=True, lamda=weight, kernel_type=series_type, method_type="baseline") grakel_results = random_walk_grakel.fit_transform(grakel_graphs) - random_walk_FlowMo = RandomWalk(series_type=series_type, weight=weight, normalize=False) + random_walk_FlowMo = RandomWalk(series_type=series_type, weight=weight, normalize=True) FlowMo_results = random_walk_FlowMo.K(tensor_adj_mats, tensor_adj_mats) npt.assert_almost_equal( From 7e7cc5c6d4075b7f8f8ec1fa8009fa9fff6453d1 Mon Sep 17 00:00:00 2001 From: Julius Date: Fri, 1 Oct 2021 09:19:24 +0100 Subject: [PATCH 7/9] remove local file path --- tests/kernels/test_random_walk.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tests/kernels/test_random_walk.py b/tests/kernels/test_random_walk.py index 82cad03..130b2a3 100644 --- a/tests/kernels/test_random_walk.py +++ b/tests/kernels/test_random_walk.py @@ -3,7 +3,7 @@ Molecule kernels for Gaussian Process Regression implemented in GPflow. """ -import sys +import os import grakel import numpy.testing as npt @@ -15,13 +15,21 @@ from GP.kernel_modules.random_walk import RandomWalk -sys.path.append('/Users/juliusschwartz/Mystuff/FlowMO') - @pytest.fixture def load_data(): - test_dataset_file = '../../datasets/ESOL.csv' - df = pd.read_csv(test_dataset_file) - return df["smiles"].to_list() + benchmark_path = os.path.abspath( + os.path.join( + os.getcwd(), '..', '..', 'datasets', 'ESOL.csv' + ) + ) + df = pd.read_csv(benchmark_path) + smiles = df["smiles"].to_list() + + adj_mats = [GetAdjacencyMatrix(MolFromSmiles(smiles)) for smiles in smiles[:50]] + tensor_adj_mats = [tf.convert_to_tensor(adj_mat) for adj_mat in adj_mats] + grakel_graphs = [grakel.Graph(adj_mat) for adj_mat in adj_mats] + + return tensor_adj_mats, grakel_graphs @pytest.mark.parametrize( @@ -36,10 +44,7 @@ def load_data(): ] ) def test_random_walk_unlabelled(weight, series_type, p, load_data): - adj_mats = [GetAdjacencyMatrix(MolFromSmiles(smiles)) for smiles in load_data[:50]] - - tensor_adj_mats = [tf.convert_to_tensor(adj_mat) for adj_mat in adj_mats] - grakel_graphs = [grakel.Graph(adj_mat) for adj_mat in adj_mats] + tensor_adj_mats, grakel_graphs = load_data random_walk_grakel = grakel.kernels.RandomWalk(normalize=True, lamda=weight, kernel_type=series_type, method_type="baseline") grakel_results = random_walk_grakel.fit_transform(grakel_graphs) From 517e9351c32ebdf2e252b60e863fced2ce4f6970 Mon Sep 17 00:00:00 2001 From: Julius Date: Sat, 2 Oct 2021 21:48:48 +0100 Subject: [PATCH 8/9] implement finite length random walks --- GP/kernel_modules/random_walk.py | 23 +++++++++++++++++++---- tests/kernels/test_random_walk.py | 10 +++++----- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/GP/kernel_modules/random_walk.py b/GP/kernel_modules/random_walk.py index 549217a..373bd7b 100644 --- a/GP/kernel_modules/random_walk.py +++ b/GP/kernel_modules/random_walk.py @@ -7,13 +7,16 @@ import numpy as np import tensorflow as tf +from math import factorial + from .kernel_utils import normalize class RandomWalk(gpflow.kernels.Kernel): - def __init__(self, uniform_probabilities=False, series_type='geometric', weight=0.1, normalize=True): + def __init__(self, uniform_probabilities=False, p=None, series_type='geometric', weight=0.1, normalize=True): super().__init__() self.uniform_probabilities=uniform_probabilities + self.p = p if series_type == 'geometric': self.geometric = True elif series_type == 'exponential': @@ -66,10 +69,22 @@ def K(self, X, X2=None): tf.linalg.LinearOperatorFullMatrix(tf.expand_dims(eigenvals_2[idx_2], axis=0)) ]).to_dense() - if self.geometric: - power_series = tf.linalg.diag(1 / (1 - diagonal)) + if self.p is not None: + power_series = tf.ones_like(diagonal) + temp_diagonal = tf.ones_like(diagonal) + + for k in range(self.p): + temp_diagonal = tf.multiply(temp_diagonal, diagonal) + if not self.geometric: + temp_diagonal = tf.divide(temp_diagonal, factorial(k+1)) + power_series = tf.add(power_series, temp_diagonal) + + power_series = tf.linalg.diag(power_series) else: - power_series = tf.linalg.diag(tf.exp(diagonal)) + if self.geometric: + power_series = tf.linalg.diag(1 / (1 - diagonal)) + else: + power_series = tf.linalg.diag(tf.exp(diagonal)) k_matrix[idx_1, idx_2] = tf.linalg.matmul( flanking_factor, diff --git a/tests/kernels/test_random_walk.py b/tests/kernels/test_random_walk.py index 130b2a3..814718e 100644 --- a/tests/kernels/test_random_walk.py +++ b/tests/kernels/test_random_walk.py @@ -37,19 +37,19 @@ def load_data(): [ (0.1, 'geometric', None), (0.1, 'exponential', None), - (0.3, 'geometric', None), #NB: requires `method_type="baseline" in grakel kernel constructor + #(0.3, 'geometric', None), #NB: requires `method_type="baseline" in grakel kernel constructor (0.3, 'exponential', None), - #(0.2, 'geometric', 3), TODO: Implement - #(0.8, 'exponential', 3), TODO: Implement + (0.3, 'geometric', 3), + (0.8, 'exponential', 3), ] ) def test_random_walk_unlabelled(weight, series_type, p, load_data): tensor_adj_mats, grakel_graphs = load_data - random_walk_grakel = grakel.kernels.RandomWalk(normalize=True, lamda=weight, kernel_type=series_type, method_type="baseline") + random_walk_grakel = grakel.kernels.RandomWalk(normalize=True, lamda=weight, kernel_type=series_type, p=p) grakel_results = random_walk_grakel.fit_transform(grakel_graphs) - random_walk_FlowMo = RandomWalk(series_type=series_type, weight=weight, normalize=True) + random_walk_FlowMo = RandomWalk(p=p, series_type=series_type, weight=weight, normalize=True) FlowMo_results = random_walk_FlowMo.K(tensor_adj_mats, tensor_adj_mats) npt.assert_almost_equal( From 1fcad8f52fefa5f6e219f98d084c6b8616e26218 Mon Sep 17 00:00:00 2001 From: Julius Date: Mon, 4 Oct 2021 18:35:47 +0100 Subject: [PATCH 9/9] added documentation, moved arguments around --- GP/kernel_modules/kernel_utils.py | 2 +- GP/kernel_modules/random_walk.py | 31 ++++++++++++++++++++++++++----- tests/kernels/test_random_walk.py | 11 ++++++----- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/GP/kernel_modules/kernel_utils.py b/GP/kernel_modules/kernel_utils.py index 9899b15..5cff6a0 100644 --- a/GP/kernel_modules/kernel_utils.py +++ b/GP/kernel_modules/kernel_utils.py @@ -1,6 +1,6 @@ # Author: Henry Moss & Ryan-Rhys Griffiths """ -Molecule kernels for Gaussian Process Regression implemented in GPflow. +Utility methods for graph-based kernels """ import tensorflow as tf diff --git a/GP/kernel_modules/random_walk.py b/GP/kernel_modules/random_walk.py index 373bd7b..937e8ec 100644 --- a/GP/kernel_modules/random_walk.py +++ b/GP/kernel_modules/random_walk.py @@ -13,18 +13,28 @@ class RandomWalk(gpflow.kernels.Kernel): - def __init__(self, uniform_probabilities=False, p=None, series_type='geometric', weight=0.1, normalize=True): + def __init__(self, normalize=True, weight=0.1, series_type='geometric', p=None, uniform_probabilities=False): super().__init__() - self.uniform_probabilities=uniform_probabilities - self.p = p + self.normalize = normalize + self.weight = weight if series_type == 'geometric': self.geometric = True elif series_type == 'exponential': self.geometric = False - self.weight = weight - self.normalize = normalize + self.p = p + self.uniform_probabilities = uniform_probabilities def K(self, X, X2=None): + """ + Compute the random walk graph kernel (Gartner et al. 2003), + specifically using the spectral decomposition approach + given by https://www.jmlr.org/papers/volume11/vishwanathan10a/vishwanathan10a.pdf + + :param X: array of N graph objects (represented as adjacency matrices of varying sizes) + :param X2: array of M graph objects (represented as adjacency matrices of varying sizes) + If None, compute the N x N kernel matrix for X. + :return: The kernel matrix of dimension N x M. + """ if X2 is None: X2 = X @@ -100,9 +110,20 @@ def K(self, X, X2=None): return tf.convert_to_tensor(k_matrix) def K_diag(self, X): + """ + Compute the diagonal of the N x N kernel matrix of X. + :param X: array of N graph objects (represented as adjacency matrices of varying sizes). + :return: N x 1 array. + """ return tf.linalg.tensor_diag_part(self.K(X)) def _generate_flanking_factors(self, eigenvecs): + """ + Helper method to calculate intermediate terms in the expression for random + walk kernel evaluated for two graphs. + :param eigenvecs: array of N matrices of varying sizes + :return: array of N matrices of varying sizes + """ flanking_factors = [] for eigenvec in eigenvecs: diff --git a/tests/kernels/test_random_walk.py b/tests/kernels/test_random_walk.py index 814718e..cf0b164 100644 --- a/tests/kernels/test_random_walk.py +++ b/tests/kernels/test_random_walk.py @@ -1,6 +1,7 @@ # Author: Henry Moss & Ryan-Rhys Griffiths """ -Molecule kernels for Gaussian Process Regression implemented in GPflow. +Verifies the FlowMO implementation of the Random Walk graph kernel +against GraKel """ import os @@ -37,10 +38,10 @@ def load_data(): [ (0.1, 'geometric', None), (0.1, 'exponential', None), - #(0.3, 'geometric', None), #NB: requires `method_type="baseline" in grakel kernel constructor + #(0.3, 'geometric', None), #Requires `method_type="baseline" in GraKel kernel constructor (0.3, 'exponential', None), - (0.3, 'geometric', 3), - (0.8, 'exponential', 3), + (0.3, 'geometric', 3), #Doesn't pass due to suspected GraKel bug, see https://github.com/ysig/GraKeL/issues/71 + (0.8, 'exponential', 3), #Same issue as above test ] ) def test_random_walk_unlabelled(weight, series_type, p, load_data): @@ -49,7 +50,7 @@ def test_random_walk_unlabelled(weight, series_type, p, load_data): random_walk_grakel = grakel.kernels.RandomWalk(normalize=True, lamda=weight, kernel_type=series_type, p=p) grakel_results = random_walk_grakel.fit_transform(grakel_graphs) - random_walk_FlowMo = RandomWalk(p=p, series_type=series_type, weight=weight, normalize=True) + random_walk_FlowMo = RandomWalk(normalize=True, weight=weight, series_type=series_type, p=p) FlowMo_results = random_walk_FlowMo.K(tensor_adj_mats, tensor_adj_mats) npt.assert_almost_equal(