metric-learning-RKHS/utils.py at main · RamyaLab/metric-learning-RKHS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
import cvxpy as cp
import numpy as np
from scipy.stats import ortho_group

from distributions import Distribution


def generate_y_true_from_triplets_under_kernel(
    triplets_train, triplets_tests, kernel, K, rng
) -> np.ndarray:

    # project triplets into reprsentation space
    varphi_triplets_tests = []
    for triplets_test in triplets_tests:
        varphi_triplets_train, varphi_triplets_test = (
            kernel.generate_projection_triplets(triplets_train, triplets_test)
        )
        varphi_triplets_tests.append(varphi_triplets_test)

    # generate y from projected triplets
    varphi_y_true_train, dists = generate_y_true_from_triplets(
        K, varphi_triplets_train, rng, deterministic=False
    )
    varphi_y_true_tests = []
    for varphi_triplets_test in varphi_triplets_tests:
        varphi_y_true_test, _ = generate_y_true_from_triplets(
            K, varphi_triplets_test, rng, deterministic=True
        )
        varphi_y_true_tests.append(varphi_y_true_test)

    return varphi_y_true_train, varphi_y_true_tests, dists


def generate_all_triplets(n: int) -> np.ndarray:
    T = []
    for i in range(n):
        for j in list(set(range(n)) - {i}):
            for k in list(set(range(j + 1, n)) - {i}):
                T.append((i, j, k))
    return np.array(T)


def generate_triplets(
    rng: np.random.Generator,
    n: int,
    size: int = 10000,
    mode: str = "jain",
    distribution: Distribution = None,
) -> np.ndarray:
    """
    Generate triplets according to https://arxiv.org/pdf/1709.06171
    Each triplet is a tuple (i, j, k) where i, j, k are indices of the triplets
    is drawn uniformly at random from the full set of n * [(n - 1) choose 2] triplets.
    """
    if mode == "jain":
        T = []
        for _ in range(size):
            i = rng.integers(0, n, size=1)[0]
            j, k = rng.choice(list(set(range(n)) - {i}), size=2, replace=False)
            j, k = min(j, k), max(j, k)
            T.append((i, j, k))
            assert 0 <= 1 < n and i != j and j != k and i != k and j < k
        T = np.array(T)
        return T
    elif mode == "tatli":
        assert distribution is not None
        assert rng is None
        assert n is None

        triplets_size = (size, 3)
        triplets = distribution.sample(triplets_size)
        return triplets
    else:
        raise ValueError(f"Invalid mode: {mode}")


def generate_K_star(rng: np.random.Generator, p: int, d: int) -> dict:
    r"""
    Generate K^\star according to https://arxiv.org/pdf/1709.06171
    K^\star = p / \sqrt{d} UU^\top
    where U is a random p x d matrix with orthonormal columns.
    """
    assert p >= d

    U = ortho_group(dim=p, seed=rng).rvs()[:, :d]
    K_star_dense = p / np.sqrt(d) * U @ U.T
    K_star_sparse = p / np.sqrt(d) * U @ U.T
    mask = rng.choice(a=np.arange(p), replace=False, size=p - d)
    K_star_sparse[mask] = 0
    K_star_sparse = K_star_sparse.T
    K_star_sparse[mask] = 0
    K_star_sparse = K_star_sparse.T

    return {
        "dense": K_star_dense,
        "sparse": K_star_sparse,
        "U": U,
    }


def M_t(
    K,
    x_i,
    x_j,
    x_k,
):
    return np.dot(x_i - x_j, np.dot(K, x_i - x_j)) - np.dot(
        x_i - x_k, np.dot(K, x_i - x_k)
    )


def generate_y_true_from_triplets(
    K: np.ndarray,
    triplets: np.ndarray,
    rng: np.random.Generator,
    deterministic: bool = False,
) -> np.ndarray:
    y_true_s = []
    dist_diff_s = []
    for x_i, x_j, x_k in triplets:
        dist_diff = M_t(K, x_i, x_j, x_k)

        if deterministic:
            y_true = -1 if dist_diff < 0 else 1
        else:
            q_t = 1 / (1 + np.exp(dist_diff))
            y_true = -1 if rng.uniform() < q_t else 1

        y_true_s.append(y_true)
        dist_diff_s.append(dist_diff)
    y_true_s = np.array(y_true_s)
    dist_diff_s = np.array(dist_diff_s)
    return y_true_s, dist_diff_s


def generate_y_true(
    K: np.ndarray,
    X: np.ndarray,
    triplets: np.ndarray,
    rng: np.random.Generator,
    deterministic: bool = False,
) -> np.ndarray:
    y_true_s = []
    dist_diff_s = []

    for x_i, x_j, x_k in X[triplets]:

        dist_diff = M_t(K, x_i, x_j, x_k)

        if deterministic:
            y_true = -1 if dist_diff < 0 else 1
        else:
            q_t = 1 / (1 + np.exp(dist_diff))
            y_true = -1 if rng.uniform() < q_t else 1

        dist_diff_s.append(dist_diff)
        y_true_s.append(y_true)

    y_true_s = np.array(y_true_s)
    dist_diff_s = np.array(dist_diff_s)
    return y_true_s, dist_diff_s


def generate_y_from_K_and_triplets(K: np.ndarray, triplets: np.ndarray) -> np.ndarray:
    y = []
    for x_i, x_j, x_k in triplets:

        dist_diff = M_t(K, x_i, x_j, x_k)
        y_hat = -1 if dist_diff < 0 else 1
        y.append(y_hat)

    y = np.array(y)
    return y


def generate_y_from_K(K: np.ndarray, X: np.ndarray, triplets: np.ndarray) -> np.ndarray:
    y = []

    for x_i, x_j, x_k in X[triplets]:

        dist_diff = M_t(K, x_i, x_j, x_k)
        y_hat = -1 if dist_diff < 0 else 1
        y.append(y_hat)

    y = np.array(y)
    return y


def risk_fn(K, K_star, triplets, X) -> float:
    i_indices, j_indices, k_indices = zip(*triplets)
    i_indices, j_indices, k_indices = (
        np.array(i_indices),
        np.array(j_indices),
        np.array(k_indices),
    )
    diff_i_j = X[i_indices] - X[j_indices]
    diff_i_k = X[i_indices] - X[k_indices]

    M_t_K_s = np.sum(K @ diff_i_j.T * diff_i_j.T, axis=0) - np.sum(
        K @ diff_i_k.T * diff_i_k.T, axis=0
    )
    M_t_K_star_s = np.sum(K_star @ diff_i_j.T * diff_i_j.T, axis=0) - np.sum(
        K_star @ diff_i_k.T * diff_i_k.T, axis=0
    )

    f_M_t_K_s = 1 / (1 + np.exp(M_t_K_s))
    f_M_t_K_star_s = 1 / (1 + np.exp(M_t_K_star_s))

    risk = np.sum(
        f_M_t_K_star_s * np.log(1 / (f_M_t_K_s + 1e-10))
        + (1 - f_M_t_K_star_s) * np.log(1 / (1 - f_M_t_K_s + 1e-10))
    )

    risk /= len(triplets)

    # for i, j, k in tqdm(triplets):

    #     M_t_K = np.dot(X[i] - X[j], np.dot(K, X[i] - X[j])) - np.dot(
    #         X[i] - X[k], np.dot(K, X[i] - X[k])
    #     )
    #     M_t_K_star = np.dot(X[i] - X[j], np.dot(K_star, X[i] - X[j])) - np.dot(
    #         X[i] - X[k], np.dot(K_star, X[i] - X[k])
    #     )

    #     f_M_t_K = 1 / (1 + np.exp(M_t_K))
    #     f_M_t_K_star = 1 / (1 + np.exp(M_t_K_star))

    #     term_1 = f_M_t_K_star * np.log(1 / (f_M_t_K + 1e-10))
    #     term_2 = (1 - f_M_t_K_star) * np.log(1 / (1 - f_M_t_K + 1e-10))

    #     risk += term_1 + term_2

    return risk


def relative_excess_risk_fn(K, K_star, triplets, X) -> float:
    risk_K = risk_fn(K, K_star, triplets, X)
    risk_K_star = risk_fn(K_star, K_star, triplets, X)
    return (risk_K - risk_K_star) / (risk_K_star + 1e-10)


def relative_squared_recovery_error_fn(K, K_star) -> float:
    return np.linalg.norm(K - K_star) ** 2 / np.linalg.norm(K_star) ** 2


def solve_for_K(
    p: int,
    triplets: np.ndarray,
    y_true: np.ndarray,
    gamma: float,
    lambda_: float,
    loss_type: str,
    constraint_type: str,
    solver: str,
    verbose: bool,
) -> np.ndarray:
    diff_i_j = triplets[:, 0, :] - triplets[:, 1, :]
    diff_i_k = triplets[:, 0, :] - triplets[:, 2, :]

    K_hat = cp.Variable((p, p), PSD=True)
    M_t_K_s = cp.sum(cp.multiply(K_hat @ diff_i_j.T, diff_i_j.T), axis=0) - cp.sum(
        cp.multiply(K_hat @ diff_i_k.T, diff_i_k.T), axis=0
    )

    if loss_type == "logistic":
        risk = cp.sum(cp.logistic(-1 * cp.multiply(y_true, M_t_K_s))) / len(triplets)
    elif loss_type == "hinge":
        risk = cp.sum(cp.pos(1 - cp.multiply(y_true, M_t_K_s))) / len(triplets)
    else:
        raise ValueError(f"Invalid loss_type: {loss_type}")

    if constraint_type == "nuc":
        constraints = [cp.norm(K_hat, p="nuc") <= lambda_]
    elif constraint_type == "fro":
        constraints = [cp.norm(K_hat, p="fro") <= lambda_]
    elif constraint_type == "l_12":
        constraints = [cp.mixed_norm(K_hat, p=2, q=1) <= lambda_]
    else:
        raise ValueError(f"Invalid constraint_type: {constraint_type}")
    constraints.append(cp.max(cp.abs(M_t_K_s)) <= gamma)

    problem = cp.Problem(cp.Minimize(risk), constraints=constraints)

    problem.solve(solver=solver, verbose=verbose)

    return problem, K_hat.value