From 07cfc9e0c9ae4847d3c3ecb21c6e9af2ed25f5f6 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Sat, 11 Jul 2020 18:34:20 -0400 Subject: [PATCH 01/26] commit before switch --- selectinf/randomized/tests/test_approx_reference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/selectinf/randomized/tests/test_approx_reference.py b/selectinf/randomized/tests/test_approx_reference.py index fbf57dd13..1832b7cbe 100644 --- a/selectinf/randomized/tests/test_approx_reference.py +++ b/selectinf/randomized/tests/test_approx_reference.py @@ -221,7 +221,7 @@ def main(nsim=300, CI = False): p=100, signal_fac=1., s=5, - sigma=2., + sigma=3., rho=0.4, randomizer_scale=1.) @@ -232,4 +232,4 @@ def main(nsim=300, CI = False): print("iteration completed ", n + 1) if __name__ == "__main__": - main(nsim=20, CI = True) + main(nsim=40, CI = False) From 58817cbea409687d1af60f64c289840359789f13 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Sun, 12 Jul 2020 16:20:08 -0400 Subject: [PATCH 02/26] added solver for multitasking lasso: V0 --- selectinf/randomized/multitask_lasso.py | 123 ++++++++++++++++++ .../randomized/tests/test_multitask_lasso.py | 33 +++++ 2 files changed, 156 insertions(+) create mode 100644 selectinf/randomized/multitask_lasso.py create mode 100644 selectinf/randomized/tests/test_multitask_lasso.py diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py new file mode 100644 index 000000000..9ca858514 --- /dev/null +++ b/selectinf/randomized/multitask_lasso.py @@ -0,0 +1,123 @@ +import numpy as np + +import regreg.api as rr +from .query import gaussian_query +from .randomization import randomization + +class multi_task_lasso(gaussian_query): + + def __init__(self, + loglikes, + feature_weight, + ridge_terms, + randomizers, + nfeature, + ntask, + perturbations=None): + + self.loglikes = loglikes + + self.ntask = ntask + self.nfeature = nfeature + + self.ridge_terms = ridge_terms + self.feature_weight = feature_weight + + self._initial_omega = perturbations + self.randomizers = randomizers + + def _solve_randomized_problem(self, + penalty, + perturbations=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + if perturbations is not None: + self._initial_omega = perturbations + + if self._initial_omega is None: + self._initial_omega = np.array([self.randomizers[i].sample() for i in range(self.ntask)]).T + + quad_list = [rr.identity_quadratic(self.ridge_terms[i], + 0, + -self._initial_omega[:, i], + 0) + for i in range(self.ntask)] + + problem_list = [rr.simple_problem(self.loglikes[i], penalty) for i in range(self.ntask)] + + initial_solns = np.array([problem_list[i].solve(quad_list[i], **solve_args) for i in range(self.ntask)]) + initial_subgrads = np.array([-(self.loglikes[i].smooth_objective(initial_solns[i, :], + 'grad') + + quad_list[i].objective(initial_solns[i, :], 'grad')) + for i in range(self.ntask)]) + + return initial_solns, initial_subgrads + + def multitasking_solver(self, num_iter=50): + + penalty_prev = rr.weighted_l1norm(self.feature_weight, lagrange=1.) + solution_prev = self._solve_randomized_problem(penalty= penalty_prev) + beta = solution_prev[0].T + + for iteration in range(num_iter): + + sum_all_tasks = np.sum(np.absolute(beta), axis=1) + penalty_weight = 1. / np.maximum(np.sqrt(sum_all_tasks), 10 ** -10) + + feature_weight_current = self.feature_weight * penalty_weight + + penalty_current = rr.weighted_l1norm(feature_weight_current, lagrange=1.) + + solution_current = self._solve_randomized_problem(penalty=penalty_current) + + beta = solution_current[0].T + + subgrad = solution_current[1].T + + return beta, subgrad + + + @staticmethod + def gaussian(predictor_vars, + response_vars, + feature_weight, + noise_levels=None, + quadratic=None, + ridge_term=None, + randomizer_scales=None): + + ntask = len(response_vars) + + if noise_levels is None: + noise_levels = np.ones(ntask) + + loglikes = {i: rr.glm.gaussian(predictor_vars[i], response_vars[i], coef=1. / noise_levels[i] ** 2, quadratic=quadratic) for i in range(ntask)} + + sample_sizes = np.asarray([predictor_vars[i].shape[0] for i in range(ntask)]) + nfeatures = [predictor_vars[i].shape[1] for i in range(ntask)] + + if all(x == nfeatures[0] for x in nfeatures) == False: + raise ValueError("all the predictor matrices must have the same regression dimensions") + else: + nfeature = nfeatures[0] + + if ridge_term is None: + ridge_terms = np.zeros(ntask) + + mean_diag_list = [np.mean((predictor_vars[i] ** 2).sum(0)) for i in range(ntask)] + if randomizer_scales is None: + randomizer_scales = np.asarray([np.sqrt(mean_diag_list[i]) * 0.5 * np.std(response_vars[i]) + * np.sqrt(sample_sizes[i] / (sample_sizes[i] - 1.)) for i in range(ntask)]) + + randomizers = {i: randomization.isotropic_gaussian((nfeature,), randomizer_scales[i]) for i in range(ntask)} + + return multi_task_lasso(loglikes, + np.asarray(feature_weight), + ridge_terms, + randomizers, + nfeature, + ntask) + + + + diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py new file mode 100644 index 000000000..4c3fb28b1 --- /dev/null +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -0,0 +1,33 @@ +import numpy as np +from ..multitask_lasso import multi_task_lasso + +def main(): + + K = 4 + sample_sizes = (200, 200, 200, 200) + p = 10 + beta = np.random.random((p, K)) + + global_sparsity_rate = .90 + task_sparsity_rate = .50 + global_zeros = np.random.choice(p,int(round(global_sparsity_rate*p))) + + beta[global_zeros,:] = np.zeros((K,)) + for i in np.delete(range(p),global_zeros): + beta[i,np.random.choice(K,int(round(task_sparsity_rate * K)))] = 0. + print("beta ", beta) + + predictor_vars = {i: np.random.random((sample_sizes[i], p)) for i in range(K)} + response_vars = {i: np.dot(predictor_vars[i], beta[:, i]) for i in range(K)} + feature_weight = 1.25 * np.ones(p) + randomizer_scales = np.ones(K) + + multi_lasso = multi_task_lasso.gaussian(predictor_vars, + response_vars, + feature_weight, + randomizer_scales = randomizer_scales) + + print(multi_lasso.multitasking_solver()) + +if __name__ == "__main__": + main() \ No newline at end of file From f938b70edf6d38366bfb446c96a873b57c8a51c7 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Fri, 24 Jul 2020 14:52:34 -0400 Subject: [PATCH 03/26] added instance for multitask --- selectinf/randomized/multitask_lasso.py | 8 ++- .../randomized/tests/test_multitask_lasso.py | 35 +++++++++- selectinf/tests/instance.py | 69 +++++++++++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 9ca858514..8858097de 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -53,14 +53,15 @@ def _solve_randomized_problem(self, return initial_solns, initial_subgrads - def multitasking_solver(self, num_iter=50): + def multitasking_solver(self, num_iter=100, min_its=20, atol=1.e-8): penalty_prev = rr.weighted_l1norm(self.feature_weight, lagrange=1.) solution_prev = self._solve_randomized_problem(penalty= penalty_prev) beta = solution_prev[0].T - for iteration in range(num_iter): + for itercount in range(num_iter): + beta_prev = beta.copy() sum_all_tasks = np.sum(np.absolute(beta), axis=1) penalty_weight = 1. / np.maximum(np.sqrt(sum_all_tasks), 10 ** -10) @@ -72,6 +73,9 @@ def multitasking_solver(self, num_iter=50): beta = solution_current[0].T + if np.all(np.fabs(beta_prev[0].T - beta)) < atol and itercount >= min_its: + break + subgrad = solution_current[1].T return beta, subgrad diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 4c3fb28b1..0aca1811d 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -1,5 +1,6 @@ import numpy as np from ..multitask_lasso import multi_task_lasso +from ...tests.instance import gaussian_multitask_instance def main(): @@ -29,5 +30,37 @@ def main(): print(multi_lasso.multitasking_solver()) + +def test_multitask_lasso(ntask=5, + nsamples=500 * np.ones(5), + p=100, + global_sparsity=.9, + task_sparsity=.5, + sigma=1.*np.ones(5), + signal=np.array([0.3,5.]), + rhos=0.*np.ones(5), + weight=1.): + + nsamples = nsamples.astype(int) + response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos)[:3] + + feature_weight = weight * np.ones(p) + randomizer_scales = np.ones(ntask) + + multi_lasso = multi_task_lasso.gaussian(predictor_vars, + response_vars, + feature_weight, + randomizer_scales = randomizer_scales) + + print('True Beta',beta) + print('Multi-task Solution', multi_lasso.multitasking_solver()[0]) + if __name__ == "__main__": - main() \ No newline at end of file + test_multitask_lasso() \ No newline at end of file diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py index 15826a148..77046852a 100644 --- a/selectinf/tests/instance.py +++ b/selectinf/tests/instance.py @@ -373,3 +373,72 @@ def HIV_NRTI(drug='3TC', Y -= Y.mean() X_NRTI -= X_NRTI.mean(0)[None, :]; X_NRTI /= X_NRTI.std(0)[None,:] return X_NRTI, Y, np.array(NRTI_muts) + + +def gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos, #list of correlation parameters + random_signs=False, + df=np.inf, + scale=True, + center=True, + equicorrelated=True): + + + predictor_vars = {i: _design(nsamples[i], p, rhos[i], equicorrelated)[0] for i in range(ntask)} + cov_matrices = [_design(nsamples[i], p, rhos[i], equicorrelated)[1] for i in range(ntask)] + + if center: + predictor_vars = {i: predictor_vars[i]-predictor_vars[i].mean(0)[None, :] for i in range(ntask)} + + signal = np.atleast_1d(signal) + + if signal.shape == (1,): + beta = float(signal[0]) * np.ones((p,ntask)) + global_nulls = np.random.choice(p, int(round(global_sparsity * p))) + beta[global_nulls, :] = np.zeros((ntask,)) + for i in np.delete(range(p), global_nulls): + beta[i, np.random.choice(ntask, int(round(task_sparsity * ntask)))] = 0. + + else: + beta = np.ones((p,ntask)) + nsignal = int(round(global_sparsity * p)) + global_nulls = np.random.choice(p, nsignal, replace=False) + beta[global_nulls, :] = np.zeros((ntask,)) + for j in range(ntask): + non_nulls = np.setdiff1d(np.arange(p), global_nulls) + beta[non_nulls, j] = np.linspace(float(signal[0]), float(signal[1]), num=p-nsignal) + + for i in np.delete(range(p), global_nulls): + beta[i, np.random.choice(ntask, int(round(task_sparsity * ntask)))] = 0. + + if random_signs: + beta *= (2 * np.random.binomial(1, 0.5, size=(p,ntask)) - 1.) + + beta /= [np.sqrt(nsamples[i]) for i in range(len(nsamples))] + + if scale: + scalings = {i: predictor_vars[i].std(0) * np.sqrt(nsamples[i]) for i in range(ntask)} + predictor_vars = {i: predictor_vars[i]/scalings[i][None, :] for i in range(ntask)} + beta *= np.sqrt(np.asarray(nsamples)) + cov_matrices = {i: cov_matrices[i] / np.multiply.outer(scalings[i], scalings[i]) for i in range(ntask)} + + active = np.zeros((p, ntask), np.bool) + active[beta != 0] = True + + # noise model + def _noise(n, df=np.inf): + if df == np.inf: + return np.random.standard_normal(n) + else: + sd_t = np.std(tdist.rvs(df, size=50000)) + return tdist.rvs(df, size=n) / sd_t + + response_vars = {i: (predictor_vars[i].dot(beta[:,i]) + _noise(nsamples[i], df)) * sigma[i] for i in range(ntask)} + + return response_vars, predictor_vars, beta * sigma, np.nonzero(active), sigma, cov_matrices \ No newline at end of file From 5a734d0805496cfd7f8a6f19d6a8008066ec5d62 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Sun, 26 Jul 2020 19:14:16 -0400 Subject: [PATCH 04/26] added fit objects to multi task solver --- selectinf/randomized/multitask_lasso.py | 157 ++++++++++++++---- .../randomized/tests/test_multitask_lasso.py | 11 +- 2 files changed, 133 insertions(+), 35 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 8858097de..852bf4253 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -3,39 +3,128 @@ import regreg.api as rr from .query import gaussian_query from .randomization import randomization +from ..base import restricted_estimator class multi_task_lasso(gaussian_query): - def __init__(self, - loglikes, - feature_weight, - ridge_terms, - randomizers, - nfeature, - ntask, - perturbations=None): + def __init__(self, + loglikes, + feature_weight, + ridge_terms, + randomizers, + nfeature, + ntask, + perturbations=None): - self.loglikes = loglikes + self.loglikes = loglikes - self.ntask = ntask - self.nfeature = nfeature + self.ntask = ntask + self.nfeature = nfeature - self.ridge_terms = ridge_terms - self.feature_weight = feature_weight + self.ridge_terms = ridge_terms + self.feature_weight = feature_weight - self._initial_omega = perturbations - self.randomizers = randomizers + self._initial_omega = perturbations + self.randomizers = randomizers - def _solve_randomized_problem(self, - penalty, - perturbations=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): - if perturbations is not None: - self._initial_omega = perturbations + def fit(self, + perturbations=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): - if self._initial_omega is None: - self._initial_omega = np.array([self.randomizers[i].sample() for i in range(self.ntask)]).T + p = self.nfeature + + (self.initial_solns, + self.initial_subgrads) = self._solve_multitasking_problem(perturbations=perturbations) + + active_signs = np.array([np.sign(self.initial_solns[:, i]) for i in range(self.ntask)]).T + active = self._active = np.array([(active_signs[:, i] != 0) for i in range(self.ntask)]).T + + self._overall = {i: (active[:, i] > 0) for i in range(self.ntask)} + self._inactive = inactive = {i: ~self._overall[i] for i in range(self.ntask)} + _seltasks = np.array([active[j,:].sum() for j in range(p)]) + self._seltasks = _seltasks[_seltasks>0] + print("number of selected tasks across variables ", self._seltasks) + + _active_signs = active_signs.copy() + ordered_variables = {i: np.nonzero(active[:, i])[0] for i in range(self.ntask)} + + self.selection_variable = {'sign': _active_signs, + 'variables': ordered_variables} + + initial_scalings = {i: np.fabs((self.initial_solns[:, i])[active[:, i]]) for i in range(self.ntask)} + self.observed_opt_state = initial_scalings + + _beta_unpenalized = {i: restricted_estimator(self.loglikes[i], + self._overall[i], + solve_args=solve_args) for i in range(self.ntask)} + + def signed_basis_vector(p, j, s): + v = np.zeros(p) + v[j] = s + return v + + beta_bar = np.zeros((p, self.ntask)) + num_opt_var = np.array([self.observed_opt_state[i].shape[0] for i in range(self.ntask)]) + + _score_linear_term = {} + observed_score_state = {} + opt_linear = {} + opt_offset = {i: self.initial_subgrads[:, i] for i in range(self.ntask)} + + for j in range(self.ntask): + beta_bar[self._overall[j], j] = _beta_unpenalized[j] + + X, y = self.loglikes[j].data + W = self._W = self.loglikes[j].saturated_loss.hessian(X.dot(beta_bar[:,j])) + + _hessian_active = np.dot(X.T, X[:, active[:, j]] * W[:, None]) + _score_linear_term[j] = _hessian_active + + _observed_score_state = _hessian_active.dot(_beta_unpenalized[j]) + _observed_score_state[inactive[j]] += self.loglikes[j].smooth_objective(beta_bar[:,j], 'grad')[inactive[j]] + observed_score_state[j] = _observed_score_state + + active_directions = np.array([signed_basis_vector(p, + k, + (active_signs[:,j])[k]) + for k in np.nonzero(active[:,j])[0]]).T + + _opt_linear = np.zeros((p, num_opt_var[j])) + scaling_slice = slice(0, active[:,j].sum()) + if np.sum(active[:,j]) == 0: + _opt_hessian = 0 + else: + _opt_hessian = (_hessian_active * (active_signs[:,j])[None, active[:,j]] + + self.ridge_terms[j] * active_directions) + + _opt_linear[:, scaling_slice] = _opt_hessian + opt_linear[j] = _opt_linear + + self._beta_full = beta_bar + + ###permuting the optimization variables + ##Pi o = o' + + Pi = np.zeros(num_opt_var.sum()) + indx = 0 + for irow, icol in np.ndindex(active.shape): + if active[irow, icol] > 0: + Pi[indx] = active[:, :(icol+1)].sum()- active[irow:, icol].sum() + indx += 1 + Pi= Pi.astype(int) + + print("check permuted indices ", Pi, num_opt_var.sum()) + + for k in range(self.ntask): + X, y = self.loglikes[k].data + print("check K.K.T. map", np.allclose(self._initial_omega[:, k], -X.T.dot(y) + opt_offset[k] + opt_linear[k].dot(self.observed_opt_state[k]))) + + return active_signs + + def _solve_randomized_problem(self, + penalty, + solve_args={'tol': 1.e-12, 'min_its': 50}): quad_list = [rr.identity_quadratic(self.ridge_terms[i], 0, @@ -53,16 +142,23 @@ def _solve_randomized_problem(self, return initial_solns, initial_subgrads - def multitasking_solver(self, num_iter=100, min_its=20, atol=1.e-8): + def _solve_multitasking_problem(self, perturbations=None, num_iter=100, atol=1.e-5): - penalty_prev = rr.weighted_l1norm(self.feature_weight, lagrange=1.) - solution_prev = self._solve_randomized_problem(penalty= penalty_prev) - beta = solution_prev[0].T + if perturbations is not None: + self._initial_omega = perturbations + + if self._initial_omega is None: + self._initial_omega = np.array([self.randomizers[i].sample() for i in range(self.ntask)]).T + + penalty_init = rr.weighted_l1norm(self.feature_weight, lagrange=1.) + solution_init = self._solve_randomized_problem(penalty= penalty_init) + beta = solution_init[0].T for itercount in range(num_iter): beta_prev = beta.copy() sum_all_tasks = np.sum(np.absolute(beta), axis=1) + penalty_weight = 1. / np.maximum(np.sqrt(sum_all_tasks), 10 ** -10) feature_weight_current = self.feature_weight * penalty_weight @@ -73,16 +169,17 @@ def multitasking_solver(self, num_iter=100, min_its=20, atol=1.e-8): beta = solution_current[0].T - if np.all(np.fabs(beta_prev[0].T - beta)) < atol and itercount >= min_its: + if np.sum(np.fabs(beta_prev[0].T - beta)) < atol: break + print("check itercount ", itercount) subgrad = solution_current[1].T return beta, subgrad - @staticmethod - def gaussian(predictor_vars, + @staticmethod + def gaussian(predictor_vars, response_vars, feature_weight, noise_levels=None, diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 0aca1811d..d4bc87580 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -28,18 +28,18 @@ def main(): feature_weight, randomizer_scales = randomizer_scales) - print(multi_lasso.multitasking_solver()) + print(multi_lasso.fit()) def test_multitask_lasso(ntask=5, nsamples=500 * np.ones(5), p=100, global_sparsity=.9, - task_sparsity=.5, + task_sparsity=.3, sigma=1.*np.ones(5), signal=np.array([0.3,5.]), rhos=0.*np.ones(5), - weight=1.): + weight=2.): nsamples = nsamples.astype(int) response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, @@ -58,9 +58,10 @@ def test_multitask_lasso(ntask=5, response_vars, feature_weight, randomizer_scales = randomizer_scales) + multi_lasso.fit() - print('True Beta',beta) - print('Multi-task Solution', multi_lasso.multitasking_solver()[0]) + #print('True Beta',beta) + #print('Multi-task Solution', multi_lasso.fit()) if __name__ == "__main__": test_multitask_lasso() \ No newline at end of file From 75dd87ecc77a5bde5aa374b315eefbd74f25a455 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Mon, 27 Jul 2020 17:12:34 -0400 Subject: [PATCH 05/26] updated fit for multi tasking solver --- selectinf/randomized/multitask_lasso.py | 108 ++++++++++++++++++------ 1 file changed, 82 insertions(+), 26 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 852bf4253..ed1636cef 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -1,4 +1,6 @@ import numpy as np +from scipy.linalg import block_diag +import collections import regreg.api as rr from .query import gaussian_query @@ -34,38 +36,69 @@ def fit(self, p = self.nfeature + ## solve multitasking problem + (self.initial_solns, self.initial_subgrads) = self._solve_multitasking_problem(perturbations=perturbations) - active_signs = np.array([np.sign(self.initial_solns[:, i]) for i in range(self.ntask)]).T - active = self._active = np.array([(active_signs[:, i] != 0) for i in range(self.ntask)]).T + ##setting up some initial objects to form our K.K.T map which loops over the K regression tasks - self._overall = {i: (active[:, i] > 0) for i in range(self.ntask)} - self._inactive = inactive = {i: ~self._overall[i] for i in range(self.ntask)} - _seltasks = np.array([active[j,:].sum() for j in range(p)]) - self._seltasks = _seltasks[_seltasks>0] - print("number of selected tasks across variables ", self._seltasks) + active_signs = np.zeros((p, self.ntask)) + beta_bar = np.zeros((p, self.ntask)) + + _active = np.zeros((p, self.ntask), np.bool) + + _overall = _inactive = {} + + ordered_variables = {} + initial_scalings = {} + _beta_unpenalized = {} + + for i in range(self.ntask): + + active_signs[:, i] = np.sign(self.initial_solns[:, i]) + _active[:, i] = active_signs[:, i] != 0 + active_ = _active[:, i] + + _overall[i] = _active[:, i] > 0 + overall = _overall[i] + _inactive[i] = ~overall + + ordered_variables[i] = np.nonzero(active_)[0] + + initial_scalings[i] = np.fabs(self.initial_solns[:, i][overall]) + + _beta_unpenalized[i] = restricted_estimator(self.loglikes[i], + overall, + solve_args=solve_args) + + beta_bar[overall, i] = _beta_unpenalized[i] + + active = self._active = _active + self._overall = _overall + self.inactive = _inactive _active_signs = active_signs.copy() - ordered_variables = {i: np.nonzero(active[:, i])[0] for i in range(self.ntask)} + + _seltasks = np.array([active[j,:].sum() for j in range(p)]) + self._seltasks = _seltasks[_seltasks>0] self.selection_variable = {'sign': _active_signs, 'variables': ordered_variables} - initial_scalings = {i: np.fabs((self.initial_solns[:, i])[active[:, i]]) for i in range(self.ntask)} self.observed_opt_state = initial_scalings - _beta_unpenalized = {i: restricted_estimator(self.loglikes[i], - self._overall[i], - solve_args=solve_args) for i in range(self.ntask)} + self._beta_full = beta_bar def signed_basis_vector(p, j, s): v = np.zeros(p) v[j] = s return v - beta_bar = np.zeros((p, self.ntask)) num_opt_var = np.array([self.observed_opt_state[i].shape[0] for i in range(self.ntask)]) + tot_opt_var = num_opt_var.sum() + + ###setting up K.K.T. map at solution _score_linear_term = {} observed_score_state = {} @@ -73,7 +106,6 @@ def signed_basis_vector(p, j, s): opt_offset = {i: self.initial_subgrads[:, i] for i in range(self.ntask)} for j in range(self.ntask): - beta_bar[self._overall[j], j] = _beta_unpenalized[j] X, y = self.loglikes[j].data W = self._W = self.loglikes[j].saturated_loss.hessian(X.dot(beta_bar[:,j])) @@ -82,7 +114,7 @@ def signed_basis_vector(p, j, s): _score_linear_term[j] = _hessian_active _observed_score_state = _hessian_active.dot(_beta_unpenalized[j]) - _observed_score_state[inactive[j]] += self.loglikes[j].smooth_objective(beta_bar[:,j], 'grad')[inactive[j]] + _observed_score_state[self.inactive[j]] += self.loglikes[j].smooth_objective(beta_bar[:,j], 'grad')[self.inactive[j]] observed_score_state[j] = _observed_score_state active_directions = np.array([signed_basis_vector(p, @@ -101,24 +133,49 @@ def signed_basis_vector(p, j, s): _opt_linear[:, scaling_slice] = _opt_hessian opt_linear[j] = _opt_linear - self._beta_full = beta_bar + for k in range(self.ntask): + X, y = self.loglikes[k].data + print("check K.K.T. map", np.allclose(self._initial_omega[:, k], -X.T.dot(y) + opt_offset[k] + opt_linear[k].dot(self.observed_opt_state[k]))) - ###permuting the optimization variables - ##Pi o = o' + ###defining a transformation tying optimization variables across the K regression tasks - Pi = np.zeros(num_opt_var.sum()) + pi = np.zeros(tot_opt_var) indx = 0 for irow, icol in np.ndindex(active.shape): if active[irow, icol] > 0: - Pi[indx] = active[:, :(icol+1)].sum()- active[irow:, icol].sum() + pi[indx] = active[:, :(icol+1)].sum()- active[irow:, icol].sum() indx += 1 - Pi= Pi.astype(int) + pi = pi.astype(int) + Pi = np.take(np.identity(tot_opt_var), pi, axis=0) - print("check permuted indices ", Pi, num_opt_var.sum()) + Psi = np.array([]) + Tau = np.array([]) + _A = np.array([]) - for k in range(self.ntask): - X, y = self.loglikes[k].data - print("check K.K.T. map", np.allclose(self._initial_omega[:, k], -X.T.dot(y) + opt_offset[k] + opt_linear[k].dot(self.observed_opt_state[k]))) + for j in range(self._seltasks.shape[0]): + + a_ = np.zeros(self._seltasks[j]) + a_[-1] = 1 + _A = block_diag(_A, a_) + + B_ = np.identity(self._seltasks[j]) + B_[(self._seltasks[j]-1), :] = np.ones(self._seltasks[j]) + Psi = block_diag(Psi, B_) + + C_ = np.identity(self._seltasks[j]) + C_[(self._seltasks[j] - 1), :] = np.zeros(self._seltasks[j]) + Tau = block_diag(Tau, C_) + + Psi = Psi[1:, :] + + Tau = Tau[1:, :] + _A = _A[1:, :] + Tau = np.delete(Tau, np.array(np.cumsum(self._seltasks) - 1), 0) + Tau = np.vstack((Tau, _A)) + + ##my final tranformation is o_new = Tau.dot(Psi).dot(Pi).dot(o) + + CoV = Tau.dot(Psi).dot(Pi) return active_signs @@ -177,7 +234,6 @@ def _solve_multitasking_problem(self, perturbations=None, num_iter=100, atol=1.e return beta, subgrad - @staticmethod def gaussian(predictor_vars, response_vars, From a359dd36c69735746baf35c3f6c332aea2bf6433 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Tue, 28 Jul 2020 13:41:38 -0400 Subject: [PATCH 06/26] defined K.K.T. map in terms of what I think should be our opt vars --- selectinf/randomized/multitask_lasso.py | 103 +++++++++++------- .../randomized/tests/test_multitask_lasso.py | 2 +- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index ed1636cef..f8a40f718 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -98,12 +98,57 @@ def signed_basis_vector(p, j, s): num_opt_var = np.array([self.observed_opt_state[i].shape[0] for i in range(self.ntask)]) tot_opt_var = num_opt_var.sum() + ###defining a transformation tying optimization variables across the K regression tasks + + pi = np.zeros(tot_opt_var) + indx = 0 + for irow, icol in np.ndindex(active.shape): + if active[irow, icol] > 0: + pi[indx] = active[:, :(icol + 1)].sum() - active[irow:, icol].sum() + indx += 1 + pi = pi.astype(int) + Pi = np.take(np.identity(tot_opt_var), pi, axis=0) + + Psi = np.array([]) + Tau = np.array([]) + _A = np.array([]) + + for j in range(self._seltasks.shape[0]): + a_ = np.zeros(self._seltasks[j]) + a_[-1] = 1 + _A = block_diag(_A, a_) + + B_ = np.identity(self._seltasks[j]) + B_[(self._seltasks[j] - 1), :] = np.ones(self._seltasks[j]) + Psi = block_diag(Psi, B_) + + C_ = np.identity(self._seltasks[j]) + C_[(self._seltasks[j] - 1), :] = np.zeros(self._seltasks[j]) + Tau = block_diag(Tau, C_) + + Psi = Psi[1:, :] + + Tau = Tau[1:, :] + _A = _A[1:, :] + Tau = np.delete(Tau, np.array(np.cumsum(self._seltasks) - 1), 0) + Tau = np.vstack((Tau, _A)) + + ##my final tranformation is o_new = Tau.dot(Psi).dot(Pi).dot(o) + + CoV = Tau.dot(Psi).dot(Pi) + ###setting up K.K.T. map at solution _score_linear_term = {} observed_score_state = {} opt_linear = {} opt_offset = {i: self.initial_subgrads[:, i] for i in range(self.ntask)} + + omegas = [] + scores = [] + opt_offsets = [] + opt_linears = np.array([]) + observed_opt_states =[] for j in range(self.ntask): @@ -111,9 +156,9 @@ def signed_basis_vector(p, j, s): W = self._W = self.loglikes[j].saturated_loss.hessian(X.dot(beta_bar[:,j])) _hessian_active = np.dot(X.T, X[:, active[:, j]] * W[:, None]) - _score_linear_term[j] = _hessian_active + _score_linear_term[j] = -_hessian_active - _observed_score_state = _hessian_active.dot(_beta_unpenalized[j]) + _observed_score_state = _score_linear_term[j].dot(_beta_unpenalized[j]) _observed_score_state[self.inactive[j]] += self.loglikes[j].smooth_objective(beta_bar[:,j], 'grad')[self.inactive[j]] observed_score_state[j] = _observed_score_state @@ -133,49 +178,29 @@ def signed_basis_vector(p, j, s): _opt_linear[:, scaling_slice] = _opt_hessian opt_linear[j] = _opt_linear - for k in range(self.ntask): - X, y = self.loglikes[k].data - print("check K.K.T. map", np.allclose(self._initial_omega[:, k], -X.T.dot(y) + opt_offset[k] + opt_linear[k].dot(self.observed_opt_state[k]))) - - ###defining a transformation tying optimization variables across the K regression tasks - - pi = np.zeros(tot_opt_var) - indx = 0 - for irow, icol in np.ndindex(active.shape): - if active[irow, icol] > 0: - pi[indx] = active[:, :(icol+1)].sum()- active[irow:, icol].sum() - indx += 1 - pi = pi.astype(int) - Pi = np.take(np.identity(tot_opt_var), pi, axis=0) - - Psi = np.array([]) - Tau = np.array([]) - _A = np.array([]) - - for j in range(self._seltasks.shape[0]): + omegas.append(self._initial_omega[:, j]) + scores.append(observed_score_state[j]) + opt_offsets.append(opt_offset[j]) + observed_opt_states.extend(self.observed_opt_state[j]) + opt_linears = block_diag(opt_linears, _opt_linear) - a_ = np.zeros(self._seltasks[j]) - a_[-1] = 1 - _A = block_diag(_A, a_) + opt_linears = opt_linears[1:, :] + omegas = np.ravel(np.asarray(omegas)) + scores = np.ravel(np.asarray(scores)) + opt_offsets = np.ravel(np.asarray(opt_offsets)) + observed_opt_states = np.asarray(observed_opt_states) - B_ = np.identity(self._seltasks[j]) - B_[(self._seltasks[j]-1), :] = np.ones(self._seltasks[j]) - Psi = block_diag(Psi, B_) + opt_linears = opt_linears.dot(np.linalg.inv(CoV)) + observed_opt_states = CoV.dot(observed_opt_states) - C_ = np.identity(self._seltasks[j]) - C_[(self._seltasks[j] - 1), :] = np.zeros(self._seltasks[j]) - Tau = block_diag(Tau, C_) + opt_vars = tot_opt_var - self._seltasks.shape[0] - Psi = Psi[1:, :] - - Tau = Tau[1:, :] - _A = _A[1:, :] - Tau = np.delete(Tau, np.array(np.cumsum(self._seltasks) - 1), 0) - Tau = np.vstack((Tau, _A)) + opt_linear_ = opt_linears[:, :opt_vars] - ##my final tranformation is o_new = Tau.dot(Psi).dot(Pi).dot(o) + opt_offset_ = opt_offsets + opt_linears[:, opt_vars:].dot(observed_opt_states[opt_vars:]) - CoV = Tau.dot(Psi).dot(Pi) + ##check K.K.T map + print("check K.K.T. map", np.allclose(omegas, scores + opt_linear_.dot(observed_opt_states[:opt_vars])+ opt_offset_)) return active_signs diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index d4bc87580..6fbea113a 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -35,7 +35,7 @@ def test_multitask_lasso(ntask=5, nsamples=500 * np.ones(5), p=100, global_sparsity=.9, - task_sparsity=.3, + task_sparsity=.6, sigma=1.*np.ones(5), signal=np.array([0.3,5.]), rhos=0.*np.ones(5), From 0291884aff2a9d43c36a44175612bab48d1e6c20 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Tue, 28 Jul 2020 15:11:46 -0400 Subject: [PATCH 07/26] completed fit --- selectinf/randomized/multitask_lasso.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index f8a40f718..893cae5b4 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -200,8 +200,16 @@ def signed_basis_vector(p, j, s): opt_offset_ = opt_offsets + opt_linears[:, opt_vars:].dot(observed_opt_states[opt_vars:]) ##check K.K.T map + print("check K.K.T. map", np.allclose(omegas, scores + opt_linear_.dot(observed_opt_states[:opt_vars])+ opt_offset_)) + ##forming linear constraints on our optimization variables + + A_scaling = -np.identity(opt_vars) + b_scaling = np.zeros(opt_vars) + + print("check signs of observed opt_states ", ((A_scaling.dot(observed_opt_states[:opt_vars])-b_scaling)<0).sum(), opt_vars) + return active_signs def _solve_randomized_problem(self, From 273844bbbfcff918f12e4b20f6b5853ff72ef9d9 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Tue, 28 Jul 2020 15:19:59 -0400 Subject: [PATCH 08/26] renamed some more objects --- selectinf/randomized/multitask_lasso.py | 32 +++++++++++++------------ 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 893cae5b4..64a57709d 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -146,9 +146,9 @@ def signed_basis_vector(p, j, s): omegas = [] scores = [] - opt_offsets = [] - opt_linears = np.array([]) - observed_opt_states =[] + opt_offsets_ = [] + opt_linears_ = np.array([]) + observed_opt_states_ =[] for j in range(self.ntask): @@ -180,35 +180,37 @@ def signed_basis_vector(p, j, s): omegas.append(self._initial_omega[:, j]) scores.append(observed_score_state[j]) - opt_offsets.append(opt_offset[j]) - observed_opt_states.extend(self.observed_opt_state[j]) - opt_linears = block_diag(opt_linears, _opt_linear) + opt_offsets_.append(opt_offset[j]) + observed_opt_states_.extend(self.observed_opt_state[j]) + opt_linears_ = block_diag(opt_linears_, _opt_linear) - opt_linears = opt_linears[1:, :] + opt_linears_ = opt_linears_[1:, :] omegas = np.ravel(np.asarray(omegas)) scores = np.ravel(np.asarray(scores)) - opt_offsets = np.ravel(np.asarray(opt_offsets)) - observed_opt_states = np.asarray(observed_opt_states) + opt_offsets_ = np.ravel(np.asarray(opt_offsets_)) + observed_opt_states_ = np.asarray(observed_opt_states_) - opt_linears = opt_linears.dot(np.linalg.inv(CoV)) - observed_opt_states = CoV.dot(observed_opt_states) + opt_linears_ = opt_linears_.dot(np.linalg.inv(CoV)) + observed_opt_states_ = CoV.dot(observed_opt_states_) opt_vars = tot_opt_var - self._seltasks.shape[0] - opt_linear_ = opt_linears[:, :opt_vars] + opt_linears = opt_linears_[:, :opt_vars] - opt_offset_ = opt_offsets + opt_linears[:, opt_vars:].dot(observed_opt_states[opt_vars:]) + opt_offsets = opt_offsets_ + opt_linears_[:, opt_vars:].dot(observed_opt_states_[opt_vars:]) + + observed_opt_states = observed_opt_states_[:opt_vars] ##check K.K.T map - print("check K.K.T. map", np.allclose(omegas, scores + opt_linear_.dot(observed_opt_states[:opt_vars])+ opt_offset_)) + print("check K.K.T. map", np.allclose(omegas, scores + opt_linears.dot(observed_opt_states)+ opt_offsets)) ##forming linear constraints on our optimization variables A_scaling = -np.identity(opt_vars) b_scaling = np.zeros(opt_vars) - print("check signs of observed opt_states ", ((A_scaling.dot(observed_opt_states[:opt_vars])-b_scaling)<0).sum(), opt_vars) + print("check signs of observed opt_states ", ((A_scaling.dot(observed_opt_states)-b_scaling)<0).sum(), opt_vars) return active_signs From dd145c8faafdf249038ac27cc03724b762b0cedf Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Wed, 29 Jul 2020 02:05:55 -0400 Subject: [PATCH 09/26] added mle based inference --- selectinf/randomized/multitask_lasso.py | 204 +++++++++++++++++- selectinf/randomized/query.py | 2 +- .../randomized/tests/test_multitask_lasso.py | 1 + .../tests/test_selective_MLE_high.py | 2 +- 4 files changed, 200 insertions(+), 9 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 64a57709d..29ccf6720 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -1,12 +1,13 @@ import numpy as np from scipy.linalg import block_diag -import collections +from scipy.stats import norm as ndist import regreg.api as rr from .query import gaussian_query from .randomization import randomization from ..base import restricted_estimator + class multi_task_lasso(gaussian_query): def __init__(self, @@ -29,7 +30,6 @@ def __init__(self, self._initial_omega = perturbations self.randomizers = randomizers - def fit(self, perturbations=None, solve_args={'tol': 1.e-12, 'min_its': 50}): @@ -44,7 +44,7 @@ def fit(self, ##setting up some initial objects to form our K.K.T map which loops over the K regression tasks active_signs = np.zeros((p, self.ntask)) - beta_bar = np.zeros((p, self.ntask)) + self.beta_bar = beta_bar = np.zeros((p, self.ntask)) _active = np.zeros((p, self.ntask), np.bool) @@ -153,7 +153,7 @@ def signed_basis_vector(p, j, s): for j in range(self.ntask): X, y = self.loglikes[j].data - W = self._W = self.loglikes[j].saturated_loss.hessian(X.dot(beta_bar[:,j])) + W = self.loglikes[j].saturated_loss.hessian(X.dot(beta_bar[:,j])) _hessian_active = np.dot(X.T, X[:, active[:, j]] * W[:, None]) _score_linear_term[j] = -_hessian_active @@ -207,13 +207,123 @@ def signed_basis_vector(p, j, s): ##forming linear constraints on our optimization variables - A_scaling = -np.identity(opt_vars) - b_scaling = np.zeros(opt_vars) + self.linear_con = -np.identity(opt_vars) + self.offset_con = np.zeros(opt_vars) - print("check signs of observed opt_states ", ((A_scaling.dot(observed_opt_states)-b_scaling)<0).sum(), opt_vars) + self.opt_linears = opt_linears + self.opt_offsets = opt_offsets + self.observed_opt_states = observed_opt_states + self.observed_score_states = scores + + print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con)<0).sum(), opt_vars) return active_signs + def _setup_implied_gaussian(self, + dispersion=1): + + precs = np.array([]) + for i in range(self.ntask): + prec = (self.randomizers[i].cov_prec)[1] + prec = prec / dispersion + precs = block_diag(precs, prec * np.identity(self.nfeature)) + precs = precs[1:, :] + + cond_precision = self.opt_linears.T.dot(precs.dot(self.opt_linears)) + cond_cov = np.linalg.inv(cond_precision) + logdens_linear = cond_cov.dot(self.opt_linears.T).dot(precs) + + cond_mean = -logdens_linear.dot(self.observed_score_states + self.opt_offsets) + + return cond_mean, cond_cov, cond_precision, logdens_linear + + def selective_MLE(self, + solve_args={'tol': 1.e-12}, + level=0.9, + dispersion=None): + + cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + + (observed_target, cov_target, cov_target_score) = self.selected_targets(dispersion) + + init_soln = self.observed_opt_states + + linear_part = self.linear_con + offset = self.offset_con + + prec_opt = cond_precision + conjugate_arg = prec_opt.dot(cond_mean) + + solver = solve_barrier_affine_py + + val, soln, hess = solver(conjugate_arg, + prec_opt, + init_soln, + linear_part, + offset, + **solve_args) + + prec_target = np.linalg.inv(cov_target) + target_lin = - logdens_linear.dot(cov_target_score.T.dot(prec_target)) + + final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + + L = target_lin.T.dot(prec_opt) + observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) + observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) + + Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) + pvalues = ndist.cdf(Z_scores) + pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + + alpha = 1. - level + quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), + final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + + + print("check within MLE ", final_estimator.shape, observed_info_mean.shape) + + return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + + def selected_targets(self, + dispersion=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + observed_targets = [] + cov_targets = np.array([]) + crosscov_target_scores = np.array([]) + + for j in range(self.ntask): + + X, y = self.loglikes[j].data + n, p = X.shape + features = self._active[:, j] + W = self.loglikes[j].saturated_loss.hessian(X.dot(self.beta_bar[:, j])) + + Xfeat = X[:, features] + Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) + + observed_target = restricted_estimator(self.loglikes[j], features, solve_args=solve_args) + cov_target = np.linalg.inv(Qfeat) + _score_linear = -Xfeat.T.dot(W[:, None] * X).T + + crosscov_target_score = _score_linear.dot(cov_target) + + if dispersion is None: # use Pearson's X^2 + dispersion = ((y - self.loglikes[j].saturated_loss.mean_function(Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + + observed_targets.extend(observed_target) + crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) + cov_targets = block_diag(cov_targets, cov_target * dispersion) + + print("check final shapes of inferential objects ", np.asarray(observed_targets).shape, + cov_targets[1:, :].shape, crosscov_target_scores[1:, :].shape) + + return (np.asarray(observed_targets), + cov_targets[1:, :], + crosscov_target_scores[1:, :]) + def _solve_randomized_problem(self, penalty, solve_args={'tol': 1.e-12, 'min_its': 50}): @@ -310,6 +420,86 @@ def gaussian(predictor_vars, nfeature, ntask) +def solve_barrier_affine_py(conjugate_arg, + precision, + feasible_point, + con_linear, + con_offset, + step=1, + nstep=1000, + min_its=200, + tol=1.e-12): + scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) + + if feasible_point is None: + feasible_point = 1. / scaling + + objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u) / 2. \ + + np.log(1. + 1. / ((con_offset - con_linear.dot(u)) / scaling)).sum() + grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot( + 1. / (scaling + con_offset - con_linear.dot(u)) - + 1. / (con_offset - con_linear.dot(u))) + barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1. / ((scaling + con_offset - con_linear.dot(u)) ** 2.) + + 1. / ((con_offset - con_linear.dot(u)) ** 2.))).dot( + con_linear) + + current = feasible_point + current_value = np.inf + + for itercount in range(nstep): + cur_grad = grad(current) + + # make sure proposal is feasible + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + if np.all(con_offset - con_linear.dot(proposal) > 0): + break + step *= 0.5 + if count >= 40: + raise ValueError('not finding a feasible point') + + # make sure proposal is a descent + + count = 0 + while True: + count += 1 + proposal = current - step * cur_grad + proposed_value = objective(proposal) + if proposed_value <= current_value: + break + step *= 0.5 + if count >= 20: + if not (np.isnan(proposed_value) or np.isnan(current_value)): + break + else: + raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) + + # stop if relative decrease is small + + if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: + current = proposal + current_value = proposed_value + break + + current = proposal + current_value = proposed_value + + if itercount % 4 == 0: + step *= 2 + + hess = np.linalg.inv(precision + barrier_hessian(current)) + return current_value, current, hess + + + + + + + + diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py index df890a2ef..7284b55f6 100644 --- a/selectinf/randomized/query.py +++ b/selectinf/randomized/query.py @@ -173,7 +173,7 @@ def _setup_implied_gaussian(self, # for covariance of randomization dispersion=1): - _, prec = self.randomizer.cov_prec + _, prec = self.randomizer.cov_prec prec = prec / dispersion if np.asarray(prec).shape in [(), (0,)]: diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 6fbea113a..6264be88a 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -59,6 +59,7 @@ def test_multitask_lasso(ntask=5, feature_weight, randomizer_scales = randomizer_scales) multi_lasso.fit() + multi_lasso.selective_MLE() #print('True Beta',beta) #print('Multi-task Solution', multi_lasso.fit()) diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py index 578ae66ec..4770dc450 100644 --- a/selectinf/randomized/tests/test_selective_MLE_high.py +++ b/selectinf/randomized/tests/test_selective_MLE_high.py @@ -228,4 +228,4 @@ def main(nsim=500): print(np.mean(cover), 'coverage so far ') if __name__ == "__main__": - main(nsim=500) + main(nsim=1) From f5af7bf55688e2be1c6026a6d7661d3b8d6d80a6 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Wed, 29 Jul 2020 15:30:16 -0400 Subject: [PATCH 10/26] added all elements of new class --- selectinf/randomized/multitask_lasso.py | 18 +++--- .../randomized/tests/test_multitask_lasso.py | 57 +++++++++++++++---- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 29ccf6720..4261d43b3 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -219,13 +219,12 @@ def signed_basis_vector(p, j, s): return active_signs - def _setup_implied_gaussian(self, - dispersion=1): + def _setup_implied_gaussian(self): precs = np.array([]) for i in range(self.ntask): prec = (self.randomizers[i].cov_prec)[1] - prec = prec / dispersion + prec = prec / self.dispersions[i] precs = block_diag(precs, prec * np.identity(self.nfeature)) precs = precs[1:, :] @@ -242,11 +241,12 @@ def selective_MLE(self, level=0.9, dispersion=None): - cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + observed_target, cov_target, cov_target_score = self.selected_targets(dispersion) - (observed_target, cov_target, cov_target_score) = self.selected_targets(dispersion) + cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() init_soln = self.observed_opt_states + print("check init_soln ", init_soln) linear_part = self.linear_con offset = self.offset_con @@ -293,6 +293,7 @@ def selected_targets(self, observed_targets = [] cov_targets = np.array([]) crosscov_target_scores = np.array([]) + dispersions = np.zeros(self.ntask) for j in range(self.ntask): @@ -313,6 +314,7 @@ def selected_targets(self, if dispersion is None: # use Pearson's X^2 dispersion = ((y - self.loglikes[j].saturated_loss.mean_function(Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + dispersions[j] = dispersion observed_targets.extend(observed_target) crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) cov_targets = block_diag(cov_targets, cov_target * dispersion) @@ -320,6 +322,8 @@ def selected_targets(self, print("check final shapes of inferential objects ", np.asarray(observed_targets).shape, cov_targets[1:, :].shape, crosscov_target_scores[1:, :].shape) + self.dispersions = dispersions + return (np.asarray(observed_targets), cov_targets[1:, :], crosscov_target_scores[1:, :]) @@ -426,8 +430,8 @@ def solve_barrier_affine_py(conjugate_arg, con_linear, con_offset, step=1, - nstep=1000, - min_its=200, + nstep=2000, + min_its=1000, tol=1.e-12): scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 6264be88a..61cd82ac8 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -1,4 +1,6 @@ import numpy as np +from scipy.stats import norm as ndist + from ..multitask_lasso import multi_task_lasso from ...tests.instance import gaussian_multitask_instance @@ -31,17 +33,18 @@ def main(): print(multi_lasso.fit()) -def test_multitask_lasso(ntask=5, - nsamples=500 * np.ones(5), +def test_multitask_lasso(ntask=2, + nsamples=500 * np.ones(2), p=100, - global_sparsity=.9, - task_sparsity=.6, - sigma=1.*np.ones(5), + global_sparsity=.8, + task_sparsity=.3, + sigma=1.*np.ones(2), signal=np.array([0.3,5.]), - rhos=0.*np.ones(5), + rhos=0.*np.ones(2), weight=2.): nsamples = nsamples.astype(int) + response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, nsamples, p, @@ -58,11 +61,43 @@ def test_multitask_lasso(ntask=5, response_vars, feature_weight, randomizer_scales = randomizer_scales) - multi_lasso.fit() - multi_lasso.selective_MLE() + active_signs = multi_lasso.fit() + + estimate, observed_info_mean, _, _, intervals = multi_lasso.selective_MLE(dispersion= None) + + beta_target = [] + + for j in range(ntask): + + beta_target.extend(np.linalg.pinv((predictor_vars[j])[:, (active_signs[:, j] != 0)]).dot(predictor_vars[j].dot(beta[:,j]))) + + beta_target = np.asarray(beta_target) + + coverage = (beta_target > intervals[:, 0]) * (beta_target < + intervals[:, 1]) + + return coverage + +def test_coverage(nsim=100): + + cov = [] + + for n in range(nsim): + + coverage = test_multitask_lasso(ntask=2, + nsamples=500 * np.ones(2), + p=100, + global_sparsity=.8, + task_sparsity=.5, + sigma=1.*np.ones(2), + signal=1., + rhos=0.3*np.ones(2), + weight=2.) + + cov.extend(coverage) - #print('True Beta',beta) - #print('Multi-task Solution', multi_lasso.fit()) + print("iteration completed ", n) + print("coverage so far ", np.mean(np.asarray(cov))) if __name__ == "__main__": - test_multitask_lasso() \ No newline at end of file + test_coverage(nsim=50) \ No newline at end of file From 3412854855db27fcb3fe900c02a9bf660cb077e4 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Wed, 29 Jul 2020 16:39:23 -0400 Subject: [PATCH 11/26] some clean up on solver --- selectinf/randomized/multitask_lasso.py | 7 ++++--- selectinf/randomized/tests/test_multitask_lasso.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 4261d43b3..670f1864f 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -348,7 +348,7 @@ def _solve_randomized_problem(self, return initial_solns, initial_subgrads - def _solve_multitasking_problem(self, perturbations=None, num_iter=100, atol=1.e-5): + def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, rtol=1.e-5): if perturbations is not None: self._initial_omega = perturbations @@ -375,10 +375,11 @@ def _solve_multitasking_problem(self, perturbations=None, num_iter=100, atol=1.e beta = solution_current[0].T - if np.sum(np.fabs(beta_prev[0].T - beta)) < atol: + if np.sum(np.fabs(beta_prev - beta)) < rtol * np.sum(np.fabs(beta_prev)): break - print("check itercount ", itercount) + print("check itercount ", itercount, np.sum(np.fabs(beta_prev - beta)), beta_prev.shape, beta.shape) + subgrad = solution_current[1].T return beta, subgrad diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 61cd82ac8..1d7c0474d 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -39,12 +39,14 @@ def test_multitask_lasso(ntask=2, global_sparsity=.8, task_sparsity=.3, sigma=1.*np.ones(2), - signal=np.array([0.3,5.]), + signal_fac= 0.5, rhos=0.*np.ones(2), weight=2.): nsamples = nsamples.astype(int) + signal = np.sqrt(signal_fac * 2 * np.log(p)) + response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, nsamples, p, @@ -54,7 +56,8 @@ def test_multitask_lasso(ntask=2, signal, rhos)[:3] - feature_weight = weight * np.ones(p) + feature_weight = weight * 1. * np.sqrt(2 * np.log(p)) * np.ones(p) + randomizer_scales = np.ones(ntask) multi_lasso = multi_task_lasso.gaussian(predictor_vars, @@ -76,6 +79,7 @@ def test_multitask_lasso(ntask=2, coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + print("coverage ", coverage) return coverage def test_coverage(nsim=100): @@ -90,9 +94,9 @@ def test_coverage(nsim=100): global_sparsity=.8, task_sparsity=.5, sigma=1.*np.ones(2), - signal=1., + signal_fac=.5, rhos=0.3*np.ones(2), - weight=2.) + weight=1.) cov.extend(coverage) @@ -100,4 +104,4 @@ def test_coverage(nsim=100): print("coverage so far ", np.mean(np.asarray(cov))) if __name__ == "__main__": - test_coverage(nsim=50) \ No newline at end of file + test_coverage(nsim=100) \ No newline at end of file From 4374483c9d10883bc9a41aaaeff4c32f8e5a0815 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Wed, 29 Jul 2020 16:56:41 -0400 Subject: [PATCH 12/26] some more clean up --- selectinf/randomized/multitask_lasso.py | 15 ++++----------- .../randomized/tests/test_multitask_lasso.py | 8 ++++---- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 670f1864f..686662413 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -203,7 +203,7 @@ def signed_basis_vector(p, j, s): ##check K.K.T map - print("check K.K.T. map", np.allclose(omegas, scores + opt_linears.dot(observed_opt_states)+ opt_offsets)) + print("check K.K.T. map", np.allclose(omegas, scores + opt_linears.dot(observed_opt_states)+ opt_offsets, atol=1e-03)) ##forming linear constraints on our optimization variables @@ -246,7 +246,6 @@ def selective_MLE(self, cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() init_soln = self.observed_opt_states - print("check init_soln ", init_soln) linear_part = self.linear_con offset = self.offset_con @@ -281,9 +280,6 @@ def selective_MLE(self, intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T - - print("check within MLE ", final_estimator.shape, observed_info_mean.shape) - return final_estimator, observed_info_mean, Z_scores, pvalues, intervals def selected_targets(self, @@ -319,9 +315,6 @@ def selected_targets(self, crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) cov_targets = block_diag(cov_targets, cov_target * dispersion) - print("check final shapes of inferential objects ", np.asarray(observed_targets).shape, - cov_targets[1:, :].shape, crosscov_target_scores[1:, :].shape) - self.dispersions = dispersions return (np.asarray(observed_targets), @@ -348,7 +341,7 @@ def _solve_randomized_problem(self, return initial_solns, initial_subgrads - def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, rtol=1.e-5): + def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, atol=1.e-5): if perturbations is not None: self._initial_omega = perturbations @@ -375,10 +368,10 @@ def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, rtol=1. beta = solution_current[0].T - if np.sum(np.fabs(beta_prev - beta)) < rtol * np.sum(np.fabs(beta_prev)): + if np.sum(np.fabs(beta_prev - beta)) < atol: break - print("check itercount ", itercount, np.sum(np.fabs(beta_prev - beta)), beta_prev.shape, beta.shape) + print("check itercount ", itercount, np.sum(np.fabs(beta_prev - beta))) subgrad = solution_current[1].T diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 1d7c0474d..d6b30e80f 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -88,14 +88,14 @@ def test_coverage(nsim=100): for n in range(nsim): - coverage = test_multitask_lasso(ntask=2, - nsamples=500 * np.ones(2), + coverage = test_multitask_lasso(ntask=4, + nsamples=500 * np.ones(4), p=100, global_sparsity=.8, task_sparsity=.5, - sigma=1.*np.ones(2), + sigma=1.*np.ones(4), signal_fac=.5, - rhos=0.3*np.ones(2), + rhos=0.3*np.ones(4), weight=1.) cov.extend(coverage) From ef537e67d83013b2aa9ab4a4c78db605c07e6ead Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Mon, 3 Aug 2020 19:15:19 -0400 Subject: [PATCH 13/26] removed dispersion bug --- selectinf/randomized/multitask_lasso.py | 161 +++++++++--------- .../randomized/tests/test_multitask_lasso.py | 44 +++-- 2 files changed, 113 insertions(+), 92 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 686662413..c40a7ed53 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -7,7 +7,6 @@ from .randomization import randomization from ..base import restricted_estimator - class multi_task_lasso(gaussian_query): def __init__(self, @@ -44,58 +43,53 @@ def fit(self, ##setting up some initial objects to form our K.K.T map which loops over the K regression tasks active_signs = np.zeros((p, self.ntask)) - self.beta_bar = beta_bar = np.zeros((p, self.ntask)) + beta_bar = np.zeros((p, self.ntask)) _active = np.zeros((p, self.ntask), np.bool) - - _overall = _inactive = {} + _overall = {} + inactive = {} ordered_variables = {} + initial_scalings = {} _beta_unpenalized = {} for i in range(self.ntask): active_signs[:, i] = np.sign(self.initial_solns[:, i]) - _active[:, i] = active_signs[:, i] != 0 - active_ = _active[:, i] + active_ = _active[:, i] = active_signs[:, i] != 0 - _overall[i] = _active[:, i] > 0 - overall = _overall[i] - _inactive[i] = ~overall + overall_ = _overall[i] = _active[:, i] > 0 + inactive[i] = ~overall_ ordered_variables[i] = np.nonzero(active_)[0] - initial_scalings[i] = np.fabs(self.initial_solns[:, i][overall]) + initial_scalings[i] = np.fabs(self.initial_solns[:, i][overall_]) _beta_unpenalized[i] = restricted_estimator(self.loglikes[i], - overall, + overall_, solve_args=solve_args) - beta_bar[overall, i] = _beta_unpenalized[i] + beta_bar[overall_, i] = _beta_unpenalized[i] active = self._active = _active self._overall = _overall - self.inactive = _inactive + self.inactive = inactive - _active_signs = active_signs.copy() + self.beta_bar = beta_bar - _seltasks = np.array([active[j,:].sum() for j in range(p)]) - self._seltasks = _seltasks[_seltasks>0] + seltasks = np.array([active[j,:].sum() for j in range(p)]) + self.seltasks = seltasks[seltasks>0] - self.selection_variable = {'sign': _active_signs, + self.selection_variable = {'sign': active_signs.copy(), 'variables': ordered_variables} - self.observed_opt_state = initial_scalings - - self._beta_full = beta_bar - def signed_basis_vector(p, j, s): v = np.zeros(p) v[j] = s return v - num_opt_var = np.array([self.observed_opt_state[i].shape[0] for i in range(self.ntask)]) + num_opt_var = np.array([initial_scalings[i].shape[0] for i in range(self.ntask)]) tot_opt_var = num_opt_var.sum() ###defining a transformation tying optimization variables across the K regression tasks @@ -106,32 +100,34 @@ def signed_basis_vector(p, j, s): if active[irow, icol] > 0: pi[indx] = active[:, :(icol + 1)].sum() - active[irow:, icol].sum() indx += 1 + pi = pi.astype(int) + Pi = np.take(np.identity(tot_opt_var), pi, axis=0) Psi = np.array([]) Tau = np.array([]) - _A = np.array([]) + A_ = np.array([]) - for j in range(self._seltasks.shape[0]): - a_ = np.zeros(self._seltasks[j]) + for j in range(self.seltasks.shape[0]): + a_ = np.zeros(self.seltasks[j]) a_[-1] = 1 - _A = block_diag(_A, a_) + A_ = block_diag(A_, a_) - B_ = np.identity(self._seltasks[j]) - B_[(self._seltasks[j] - 1), :] = np.ones(self._seltasks[j]) + B_ = np.identity(self.seltasks[j]) + B_[(self.seltasks[j] - 1), :] = np.ones(self.seltasks[j]) Psi = block_diag(Psi, B_) - C_ = np.identity(self._seltasks[j]) - C_[(self._seltasks[j] - 1), :] = np.zeros(self._seltasks[j]) + C_ = np.identity(self.seltasks[j]) + C_[(self.seltasks[j] - 1), :] = np.zeros(self.seltasks[j]) Tau = block_diag(Tau, C_) Psi = Psi[1:, :] Tau = Tau[1:, :] - _A = _A[1:, :] - Tau = np.delete(Tau, np.array(np.cumsum(self._seltasks) - 1), 0) - Tau = np.vstack((Tau, _A)) + A_ = A_[1:, :] + Tau = np.delete(Tau, np.array(np.cumsum(self.seltasks) - 1), 0) + Tau = np.vstack((Tau, A_)) ##my final tranformation is o_new = Tau.dot(Psi).dot(Pi).dot(o) @@ -141,7 +137,6 @@ def signed_basis_vector(p, j, s): _score_linear_term = {} observed_score_state = {} - opt_linear = {} opt_offset = {i: self.initial_subgrads[:, i] for i in range(self.ntask)} omegas = [] @@ -150,50 +145,54 @@ def signed_basis_vector(p, j, s): opt_linears_ = np.array([]) observed_opt_states_ =[] - for j in range(self.ntask): - - X, y = self.loglikes[j].data - W = self.loglikes[j].saturated_loss.hessian(X.dot(beta_bar[:,j])) + for i in range(self.ntask): + X, y = self.loglikes[i].data + W = self.loglikes[i].saturated_loss.hessian(X.dot(beta_bar[:,i])) - _hessian_active = np.dot(X.T, X[:, active[:, j]] * W[:, None]) - _score_linear_term[j] = -_hessian_active + _hessian_active = np.dot(X.T, X[:, active[:, i]] * W[:, None]) + _score_linear_term[i] = -_hessian_active - _observed_score_state = _score_linear_term[j].dot(_beta_unpenalized[j]) - _observed_score_state[self.inactive[j]] += self.loglikes[j].smooth_objective(beta_bar[:,j], 'grad')[self.inactive[j]] - observed_score_state[j] = _observed_score_state + _observed_score_state = _score_linear_term[i].dot(_beta_unpenalized[i]) + _observed_score_state[self.inactive[i]] += self.loglikes[i].smooth_objective(beta_bar[:,i], 'grad')[self.inactive[i]] + observed_score_state[i] = _observed_score_state active_directions = np.array([signed_basis_vector(p, k, - (active_signs[:,j])[k]) - for k in np.nonzero(active[:,j])[0]]).T + (active_signs[:,i])[k]) + for k in np.nonzero(active[:,i])[0]]).T - _opt_linear = np.zeros((p, num_opt_var[j])) - scaling_slice = slice(0, active[:,j].sum()) - if np.sum(active[:,j]) == 0: + _opt_linear = np.zeros((p, num_opt_var[i])) + scaling_slice = slice(0, active[:,i].sum()) + if np.sum(active[:,i]) == 0: _opt_hessian = 0 else: - _opt_hessian = (_hessian_active * (active_signs[:,j])[None, active[:,j]] - + self.ridge_terms[j] * active_directions) + _opt_hessian = (_hessian_active * (active_signs[:,i])[None, active[:,i]] + + self.ridge_terms[i] * active_directions) _opt_linear[:, scaling_slice] = _opt_hessian - opt_linear[j] = _opt_linear - omegas.append(self._initial_omega[:, j]) - scores.append(observed_score_state[j]) - opt_offsets_.append(opt_offset[j]) - observed_opt_states_.extend(self.observed_opt_state[j]) + omegas.append(self._initial_omega[:, i]) + + scores.append(observed_score_state[i]) + + opt_offsets_.append(opt_offset[i]) + + observed_opt_states_.extend(initial_scalings[i]) + opt_linears_ = block_diag(opt_linears_, _opt_linear) opt_linears_ = opt_linears_[1:, :] omegas = np.ravel(np.asarray(omegas)) scores = np.ravel(np.asarray(scores)) opt_offsets_ = np.ravel(np.asarray(opt_offsets_)) + observed_opt_states_ = np.asarray(observed_opt_states_) opt_linears_ = opt_linears_.dot(np.linalg.inv(CoV)) + observed_opt_states_ = CoV.dot(observed_opt_states_) - opt_vars = tot_opt_var - self._seltasks.shape[0] + opt_vars = tot_opt_var - self.seltasks.shape[0] opt_linears = opt_linears_[:, :opt_vars] @@ -223,8 +222,7 @@ def _setup_implied_gaussian(self): precs = np.array([]) for i in range(self.ntask): - prec = (self.randomizers[i].cov_prec)[1] - prec = prec / self.dispersions[i] + _, prec = self.randomizers[i].cov_prec precs = block_diag(precs, prec * np.identity(self.nfeature)) precs = precs[1:, :] @@ -239,9 +237,9 @@ def _setup_implied_gaussian(self): def selective_MLE(self, solve_args={'tol': 1.e-12}, level=0.9, - dispersion=None): + dispersions=None): - observed_target, cov_target, cov_target_score = self.selected_targets(dispersion) + observed_target, cov_target, cov_target_score = self.selected_targets(dispersions) cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() @@ -260,10 +258,14 @@ def selective_MLE(self, init_soln, linear_part, offset, - **solve_args) + step=1, + nstep=5000, + min_its=3000, + tol=1.e-12) prec_target = np.linalg.inv(cov_target) - target_lin = - logdens_linear.dot(cov_target_score.T.dot(prec_target)) + + target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) @@ -283,13 +285,12 @@ def selective_MLE(self, return final_estimator, observed_info_mean, Z_scores, pvalues, intervals def selected_targets(self, - dispersion=None, + dispersions=None, solve_args={'tol': 1.e-12, 'min_its': 50}): observed_targets = [] cov_targets = np.array([]) crosscov_target_scores = np.array([]) - dispersions = np.zeros(self.ntask) for j in range(self.ntask): @@ -307,16 +308,15 @@ def selected_targets(self, crosscov_target_score = _score_linear.dot(cov_target) - if dispersion is None: # use Pearson's X^2 + if dispersions is None: # use Pearson's X^2 dispersion = ((y - self.loglikes[j].saturated_loss.mean_function(Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + else: + dispersion = dispersions[j] - dispersions[j] = dispersion observed_targets.extend(observed_target) crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) cov_targets = block_diag(cov_targets, cov_target * dispersion) - self.dispersions = dispersions - return (np.asarray(observed_targets), cov_targets[1:, :], crosscov_target_scores[1:, :]) @@ -371,7 +371,7 @@ def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, atol=1. if np.sum(np.fabs(beta_prev - beta)) < atol: break - print("check itercount ", itercount, np.sum(np.fabs(beta_prev - beta))) + print("check itercount ", itercount) subgrad = solution_current[1].T @@ -379,19 +379,20 @@ def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, atol=1. @staticmethod def gaussian(predictor_vars, - response_vars, - feature_weight, - noise_levels=None, - quadratic=None, - ridge_term=None, - randomizer_scales=None): + response_vars, + feature_weight, + noise_levels=None, + quadratic=None, + ridge_term=None, + randomizer_scales=None): ntask = len(response_vars) if noise_levels is None: noise_levels = np.ones(ntask) - loglikes = {i: rr.glm.gaussian(predictor_vars[i], response_vars[i], coef=1. / noise_levels[i] ** 2, quadratic=quadratic) for i in range(ntask)} + loglikes = {i: rr.glm.gaussian(predictor_vars[i], response_vars[i], coef=1. / noise_levels[i] ** 2, quadratic=quadratic) + for i in range(ntask)} sample_sizes = np.asarray([predictor_vars[i].shape[0] for i in range(ntask)]) nfeatures = [predictor_vars[i].shape[1] for i in range(ntask)] @@ -403,11 +404,13 @@ def gaussian(predictor_vars, if ridge_term is None: ridge_terms = np.zeros(ntask) + else: + ridge_terms = ridge_term mean_diag_list = [np.mean((predictor_vars[i] ** 2).sum(0)) for i in range(ntask)] if randomizer_scales is None: - randomizer_scales = np.asarray([np.sqrt(mean_diag_list[i]) * 0.5 * np.std(response_vars[i]) - * np.sqrt(sample_sizes[i] / (sample_sizes[i] - 1.)) for i in range(ntask)]) + randomizer_scales = np.asarray([np.sqrt(mean_diag_list[i]) * 0.5 * np.std(response_vars[i]) + * np.sqrt(sample_sizes[i] / (sample_sizes[i] - 1.)) for i in range(ntask)]) randomizers = {i: randomization.isotropic_gaussian((nfeature,), randomizer_scales[i]) for i in range(ntask)} @@ -424,7 +427,7 @@ def solve_barrier_affine_py(conjugate_arg, con_linear, con_offset, step=1, - nstep=2000, + nstep=5000, min_its=1000, tol=1.e-12): scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index d6b30e80f..0d37fd31a 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -58,50 +58,68 @@ def test_multitask_lasso(ntask=2, feature_weight = weight * 1. * np.sqrt(2 * np.log(p)) * np.ones(p) - randomizer_scales = np.ones(ntask) + sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) + randomizer_scales = 1. * sigmas_ + + #ridge_terms = np.array([np.std(response_vars[i]) * np.sqrt(np.mean((predictor_vars[i] ** 2).sum(0)))/ np.sqrt(nsamples[i] - 1) + # for i in range(ntask)]) + multi_lasso = multi_task_lasso.gaussian(predictor_vars, response_vars, feature_weight, + ridge_term = None, randomizer_scales = randomizer_scales) active_signs = multi_lasso.fit() - estimate, observed_info_mean, _, _, intervals = multi_lasso.selective_MLE(dispersion= None) + dispersions = np.array([np.linalg.norm(response_vars[i] - + predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) + for i in range(ntask)]) + + estimate, observed_info_mean, _, _, intervals = multi_lasso.selective_MLE(dispersions= dispersions) beta_target = [] for j in range(ntask): - beta_target.extend(np.linalg.pinv((predictor_vars[j])[:, (active_signs[:, j] != 0)]).dot(predictor_vars[j].dot(beta[:,j]))) beta_target = np.asarray(beta_target) + print("check max size ", np.amax(np.fabs(beta_target))) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) print("coverage ", coverage) - return coverage + if coverage.sum() <= 0.10: + print(intervals, multi_lasso.observed_opt_states, estimate) + return coverage, intervals[:, 1]- intervals[:, 0] def test_coverage(nsim=100): cov = [] + len = [] for n in range(nsim): - coverage = test_multitask_lasso(ntask=4, - nsamples=500 * np.ones(4), - p=100, - global_sparsity=.8, - task_sparsity=.5, - sigma=1.*np.ones(4), - signal_fac=.5, - rhos=0.3*np.ones(4), - weight=1.) + coverage, length = test_multitask_lasso(ntask=4, + nsamples=500 * np.ones(4), + p=100, + global_sparsity=.95, + task_sparsity=0.25, + sigma=1.*np.ones(4), + signal_fac=1.2, + rhos=0.20*np.ones(4), + weight=1.) + + if coverage.sum()<= 0.10: + break cov.extend(coverage) + len.extend(length) print("iteration completed ", n) print("coverage so far ", np.mean(np.asarray(cov))) + print("length so far ", np.mean(np.asarray(length))) if __name__ == "__main__": test_coverage(nsim=100) \ No newline at end of file From 670ad5da3c51a28476c630cd4310327724aea34d Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Tue, 11 Aug 2020 10:53:52 -0400 Subject: [PATCH 14/26] work in progress on mtl --- selectinf/randomized/multitask_lasso.py | 91 ++++++++++++++++++- .../randomized/tests/test_multitask_lasso.py | 21 +++-- 2 files changed, 101 insertions(+), 11 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index c40a7ed53..ec033d994 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -79,6 +79,7 @@ def fit(self, self.beta_bar = beta_bar seltasks = np.array([active[j,:].sum() for j in range(p)]) + self.active_global = np.asarray([j for j in range(p) if seltasks[j]>0]) self.seltasks = seltasks[seltasks>0] self.selection_variable = {'sign': active_signs.copy(), @@ -214,7 +215,7 @@ def signed_basis_vector(p, j, s): self.observed_opt_states = observed_opt_states self.observed_score_states = scores - print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con)<0).sum(), opt_vars) + print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con)<0).sum(), opt_vars, self.seltasks) return active_signs @@ -284,6 +285,94 @@ def selective_MLE(self, return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + def selective_MLE_mt(self, + solve_args={'tol': 1.e-12}, + level=0.9, + dispersions=None): + + observed_target, cov_target, cov_target_score = self.multitasking_target(dispersions=dispersions) + + cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + + init_soln = self.observed_opt_states + + linear_part = self.linear_con + offset = self.offset_con + + prec_opt = cond_precision + conjugate_arg = prec_opt.dot(cond_mean) + + solver = solve_barrier_affine_py + + val, soln, hess = solver(conjugate_arg, + prec_opt, + init_soln, + linear_part, + offset, + step=1, + nstep=5000, + min_its=3000, + tol=1.e-12) + + prec_target = np.linalg.inv(cov_target) + + target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) + + final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + + L = target_lin.T.dot(prec_opt) + observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) + observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) + + Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) + pvalues = ndist.cdf(Z_scores) + pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + + alpha = 1. - level + quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), + final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + + return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + + def multitasking_target(self, + dispersions=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + observed_targets = {} + cov_targets = {} + crosscov_target_scores = {} + + features = self.active_global + + for i in range(self.ntask): + + X, y = self.loglikes[i].data + n, p = X.shape + W = self.loglikes[i].saturated_loss.hessian(X.dot(self.beta_bar[:, i])) + + Xfeat = X[:, features] + Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) + + observed_targets[i] = restricted_estimator(self.loglikes[i], features, solve_args=solve_args) + + cov_target = np.linalg.inv(Qfeat) + _score_linear = -Xfeat.T.dot(W[:, None] * X).T + + crosscov_target_score = _score_linear.dot(cov_target) + + if dispersions is None: # use Pearson's X^2 + dispersion = ((y - self.loglikes[i].saturated_loss.mean_function(Xfeat.dot(observed_targets[i]))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + else: + dispersion = dispersions[i] + + crosscov_target_scores[i] = crosscov_target_score.T * dispersion + cov_targets[i] = cov_target * dispersion + + return (observed_targets, + cov_targets, + crosscov_target_scores) + def selected_targets(self, dispersions=None, solve_args={'tol': 1.e-12, 'min_its': 50}): diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 0d37fd31a..1e28ba343 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -60,7 +60,7 @@ def test_multitask_lasso(ntask=2, sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - randomizer_scales = 1. * sigmas_ + randomizer_scales = 0.71 * sigmas_ #ridge_terms = np.array([np.std(response_vars[i]) * np.sqrt(np.mean((predictor_vars[i] ** 2).sum(0)))/ np.sqrt(nsamples[i] - 1) # for i in range(ntask)]) @@ -84,7 +84,7 @@ def test_multitask_lasso(ntask=2, beta_target.extend(np.linalg.pinv((predictor_vars[j])[:, (active_signs[:, j] != 0)]).dot(predictor_vars[j].dot(beta[:,j]))) beta_target = np.asarray(beta_target) - print("check max size ", np.amax(np.fabs(beta_target))) + print("check size range ", np.amax(np.fabs(beta_target)), np.amin(np.fabs(beta_target))) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) @@ -101,15 +101,15 @@ def test_coverage(nsim=100): for n in range(nsim): - coverage, length = test_multitask_lasso(ntask=4, - nsamples=500 * np.ones(4), - p=100, + coverage, length = test_multitask_lasso(ntask=5, + nsamples=500 * np.ones(5), + p=50, global_sparsity=.95, - task_sparsity=0.25, - sigma=1.*np.ones(4), - signal_fac=1.2, - rhos=0.20*np.ones(4), - weight=1.) + task_sparsity=0.20, + sigma=1.*np.ones(5), + signal_fac=1., + rhos=0.50*np.ones(5), + weight=1.2) if coverage.sum()<= 0.10: break @@ -122,4 +122,5 @@ def test_coverage(nsim=100): print("length so far ", np.mean(np.asarray(length))) if __name__ == "__main__": + test_coverage(nsim=100) \ No newline at end of file From 1f7da2332fe8e84a18d73734dfab89f244f603d8 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Sun, 16 Aug 2020 17:21:21 -0400 Subject: [PATCH 15/26] lik for multitasking target --- selectinf/randomized/multitask_lasso.py | 172 ++++++------------ .../randomized/tests/test_multitask_lasso.py | 4 + 2 files changed, 57 insertions(+), 119 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index ec033d994..7d41a356f 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -235,113 +235,83 @@ def _setup_implied_gaussian(self): return cond_mean, cond_cov, cond_precision, logdens_linear - def selective_MLE(self, - solve_args={'tol': 1.e-12}, - level=0.9, - dispersions=None): + def _set_marginal_parameters(self, + dispersions= None): - observed_target, cov_target, cov_target_score = self.selected_targets(dispersions) - - cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() - - init_soln = self.observed_opt_states - - linear_part = self.linear_con - offset = self.offset_con - - prec_opt = cond_precision - conjugate_arg = prec_opt.dot(cond_mean) - - solver = solve_barrier_affine_py - - val, soln, hess = solver(conjugate_arg, - prec_opt, - init_soln, - linear_part, - offset, - step=1, - nstep=5000, - min_its=3000, - tol=1.e-12) + observed_target, cov_target, cov_target_score = self.multitasking_target(dispersions) + ntarget = observed_target.shape[0] prec_target = np.linalg.inv(cov_target) - target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) - - final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + nopt = cond_cov.shape[0] - L = target_lin.T.dot(prec_opt) - observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) - observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) + target_linear = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) - Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) - pvalues = ndist.cdf(Z_scores) - pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + implied_precision = np.zeros((ntarget + nopt, ntarget + nopt)) + implied_precision[:ntarget][:, :ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear))) + implied_precision[:ntarget][:, ntarget:] = -target_linear.T.dot(cond_precision) + implied_precision[ntarget:][:, :ntarget] = (-target_linear.T.dot(cond_precision)).T - alpha = 1. - level - quantile = ndist.ppf(1 - alpha / 2.) - intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), - final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + implied_precision[ntarget:][:, ntarget:] = cond_precision - return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + implied_cov = np.linalg.inv(implied_precision) - def selective_MLE_mt(self, - solve_args={'tol': 1.e-12}, - level=0.9, - dispersions=None): + self.linear_coef = implied_cov[ntarget:][:, :ntarget].dot(prec_target) - observed_target, cov_target, cov_target_score = self.multitasking_target(dispersions=dispersions) + target_offset = cond_mean - target_linear.dot(observed_target) + M = implied_cov[ntarget:][:, ntarget:].dot(cond_precision.dot(target_offset)) + N = -target_linear.T.dot(cond_precision).dot(target_offset) - cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + self.offset_coef = implied_cov[ntarget:][:, :ntarget].dot(N) + M - init_soln = self.observed_opt_states + self.cov_marginal = implied_cov[ntarget:][:, ntarget:] + self.prec_marginal = np.linalg.inv(self.cov_marginal) - linear_part = self.linear_con - offset = self.offset_con + def multitasking_likelihood(self, + target_parameter): - prec_opt = cond_precision - conjugate_arg = prec_opt.dot(cond_mean) + ##this is the O problem - solver = solve_barrier_affine_py + D = np.identity(target_parameter.shape[0]) + for i in range(self.ntask-1): + D = np.block([[D],[np.identity(target_parameter.shape[0])]]) - val, soln, hess = solver(conjugate_arg, - prec_opt, - init_soln, - linear_part, - offset, - step=1, - nstep=5000, - min_its=3000, - tol=1.e-12) + par = D.dot(target_parameter) + mean_marginal = self.linear_coef.dot(par) + self.offset_coef + prec_marginal = self.prec_marginal + conjugate_marginal = prec_marginal.dot(mean_marginal) - prec_target = np.linalg.inv(cov_target) + val, soln, hess = solve_barrier_affine_py(conjugate_marginal, + prec_marginal, + self.observed_opt_states, + self.linear_con, + self.offset_con, + **self.solve_args) - target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) + log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. - final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + ## based on this we now write the log-likelihood, gradient and hessian - L = target_lin.T.dot(prec_opt) - observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) - observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) + log_lik = -((self.observed_target - par).T.dot(self.prec_target).dot(self.observed_target - par)) / 2. - log_normalizer - Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) - pvalues = ndist.cdf(Z_scores) - pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + grad_lik = D.T.dot(self.prec_target.dot(self.observed_target) - + self.prec_target.dot(par) \ + - self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)) - alpha = 1. - level - quantile = ndist.ppf(1 - alpha / 2.) - intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), - final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + hess_lik = D.T.dot(-self.prec_target + + self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef)- + self.linear_coef.T.dot(prec_marginal).dot(hess).dot(prec_marginal).self.linear_coef).dot(D) - return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + return log_lik, grad_lik, hess_lik def multitasking_target(self, dispersions=None, solve_args={'tol': 1.e-12, 'min_its': 50}): - observed_targets = {} - cov_targets = {} - crosscov_target_scores = {} + observed_targets = [] + cov_targets = np.array([]) + crosscov_target_scores = np.array([]) features = self.active_global @@ -354,55 +324,19 @@ def multitasking_target(self, Xfeat = X[:, features] Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) - observed_targets[i] = restricted_estimator(self.loglikes[i], features, solve_args=solve_args) + observed_target = restricted_estimator(self.loglikes[i], features, solve_args=solve_args) + observed_targets.extend(observed_target) cov_target = np.linalg.inv(Qfeat) _score_linear = -Xfeat.T.dot(W[:, None] * X).T crosscov_target_score = _score_linear.dot(cov_target) - if dispersions is None: # use Pearson's X^2 - dispersion = ((y - self.loglikes[i].saturated_loss.mean_function(Xfeat.dot(observed_targets[i]))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + if dispersions is None: + dispersion = ((y - self.loglikes[i].saturated_loss.mean_function(Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) else: dispersion = dispersions[i] - crosscov_target_scores[i] = crosscov_target_score.T * dispersion - cov_targets[i] = cov_target * dispersion - - return (observed_targets, - cov_targets, - crosscov_target_scores) - - def selected_targets(self, - dispersions=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): - - observed_targets = [] - cov_targets = np.array([]) - crosscov_target_scores = np.array([]) - - for j in range(self.ntask): - - X, y = self.loglikes[j].data - n, p = X.shape - features = self._active[:, j] - W = self.loglikes[j].saturated_loss.hessian(X.dot(self.beta_bar[:, j])) - - Xfeat = X[:, features] - Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) - - observed_target = restricted_estimator(self.loglikes[j], features, solve_args=solve_args) - cov_target = np.linalg.inv(Qfeat) - _score_linear = -Xfeat.T.dot(W[:, None] * X).T - - crosscov_target_score = _score_linear.dot(cov_target) - - if dispersions is None: # use Pearson's X^2 - dispersion = ((y - self.loglikes[j].saturated_loss.mean_function(Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) - else: - dispersion = dispersions[j] - - observed_targets.extend(observed_target) crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) cov_targets = block_diag(cov_targets, cov_target * dispersion) diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 1e28ba343..44be1c934 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -72,6 +72,10 @@ def test_multitask_lasso(ntask=2, randomizer_scales = randomizer_scales) active_signs = multi_lasso.fit() + ###check new target###### + + multi_lasso.multitasking_target() + dispersions = np.array([np.linalg.norm(response_vars[i] - predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) for i in range(ntask)]) From e3681f058630e8fe8bc2006e7db09784e1cc2f62 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Sun, 16 Aug 2020 18:55:13 -0400 Subject: [PATCH 16/26] add solver for mle --- selectinf/randomized/multitask_lasso.py | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 7d41a356f..3034d3d56 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -268,6 +268,9 @@ def _set_marginal_parameters(self, self.cov_marginal = implied_cov[ntarget:][:, ntarget:] self.prec_marginal = np.linalg.inv(self.cov_marginal) + self.observed_target = observed_target + self.prec_target = prec_target + def multitasking_likelihood(self, target_parameter): @@ -303,8 +306,63 @@ def multitasking_likelihood(self, self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef)- self.linear_coef.T.dot(prec_marginal).dot(hess).dot(prec_marginal).self.linear_coef).dot(D) + self.initial_estimate_mle = D.dot(self.observed_target) + return log_lik, grad_lik, hess_lik + def minimize_lik(self, + step=1, + nstep=5000, + min_its=1000, + tol=1.e-12): + + current = self.initial_estimate_mle + current_value = np.inf + + log_lik_current, grad_lik_current, hess_lik_current = self.multitasking_likelihood(current) + + for itercount in range(nstep): + + newton_step = -grad_lik_current + + count = 0 + + while True: + count += 1 + proposal = current - step * newton_step + + log_lik_proposal, grad_lik_proposal, hess_lik_proposal = self.multitasking_likelihood(proposal) + proposed_value = -log_lik_proposal + + if proposed_value <= current_value: + break + step *= 0.5 + + if count >= 20: + if not (np.isnan(proposed_value) or np.isnan(current_value)): + break + else: + raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) + + if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: + + current = proposal + current_value = proposed_value + + grad_lik_current = grad_lik_proposal + hess_lik_current = hess_lik_proposal + break + + current = proposal + current_value = proposed_value + grad_lik_current = grad_lik_proposal + hess_lik_current = hess_lik_proposal + + if itercount % 4 == 0: + step *= 2 + + return current_value, current, -hess_lik_current + def multitasking_target(self, dispersions=None, solve_args={'tol': 1.e-12, 'min_its': 50}): @@ -453,6 +511,7 @@ def solve_barrier_affine_py(conjugate_arg, nstep=5000, min_its=1000, tol=1.e-12): + scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) if feasible_point is None: From d73fe46fe8ddbcb77f05eb03fcfba66f1e96b078 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Mon, 17 Aug 2020 22:08:46 -0400 Subject: [PATCH 17/26] updated test --- selectinf/randomized/multitask_lasso.py | 67 ++++++++++++------- .../randomized/tests/test_multitask_lasso.py | 35 +++++----- 2 files changed, 61 insertions(+), 41 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 3034d3d56..c2fb8b6da 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -238,7 +238,7 @@ def _setup_implied_gaussian(self): def _set_marginal_parameters(self, dispersions= None): - observed_target, cov_target, cov_target_score = self.multitasking_target(dispersions) + observed_target, cov_target, cov_target_score = self.multitasking_target(dispersions = dispersions) ntarget = observed_target.shape[0] prec_target = np.linalg.inv(cov_target) @@ -270,17 +270,24 @@ def _set_marginal_parameters(self, self.observed_target = observed_target self.prec_target = prec_target - + + D = np.identity(self.active_global.shape[0]) + for i in range(self.ntask - 1): + D = np.block([[D], [np.identity(self.active_global.shape[0])]]) + + + self.W_coef = np.linalg.inv(D.T.dot(self.prec_target).dot(D)).dot(D.T.dot(self.prec_target)) + + self.initial_estimate_mle = self.W_coef.dot(self.observed_target) + + self.D = D + def multitasking_likelihood(self, target_parameter): ##this is the O problem - D = np.identity(target_parameter.shape[0]) - for i in range(self.ntask-1): - D = np.block([[D],[np.identity(target_parameter.shape[0])]]) - - par = D.dot(target_parameter) + par = self.D.dot(target_parameter) mean_marginal = self.linear_coef.dot(par) + self.offset_coef prec_marginal = self.prec_marginal conjugate_marginal = prec_marginal.dot(mean_marginal) @@ -290,7 +297,10 @@ def multitasking_likelihood(self, self.observed_opt_states, self.linear_con, self.offset_con, - **self.solve_args) + step=1, + nstep=5000, + min_its=1000, + tol=1.e-12) log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. @@ -298,23 +308,23 @@ def multitasking_likelihood(self, log_lik = -((self.observed_target - par).T.dot(self.prec_target).dot(self.observed_target - par)) / 2. - log_normalizer - grad_lik = D.T.dot(self.prec_target.dot(self.observed_target) - - self.prec_target.dot(par) \ - - self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)) + grad_lik = self.D.T.dot(self.prec_target.dot(self.observed_target) - + self.prec_target.dot(par)- self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)) - hess_lik = D.T.dot(-self.prec_target + - self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef)- - self.linear_coef.T.dot(prec_marginal).dot(hess).dot(prec_marginal).self.linear_coef).dot(D) - - self.initial_estimate_mle = D.dot(self.observed_target) + hess_lik = self.D.T.dot(-self.prec_target + self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef)- + self.linear_coef.T.dot(prec_marginal).dot(hess).dot(prec_marginal.dot(self.linear_coef))).dot(self.D) return log_lik, grad_lik, hess_lik - def minimize_lik(self, - step=1, - nstep=5000, - min_its=1000, - tol=1.e-12): + def multitask_inference(self, + dispersions=None, + step=1, + nstep = 1000, + min_its = 200, + tol = 1.e-8, + level = 0.9): + + self._set_marginal_parameters(dispersions=dispersions) current = self.initial_estimate_mle current_value = np.inf @@ -322,7 +332,6 @@ def minimize_lik(self, log_lik_current, grad_lik_current, hess_lik_current = self.multitasking_likelihood(current) for itercount in range(nstep): - newton_step = -grad_lik_current count = 0 @@ -361,7 +370,19 @@ def minimize_lik(self, if itercount % 4 == 0: step *= 2 - return current_value, current, -hess_lik_current + final_estimator = current + observed_info_mean = np.linalg.inv(-hess_lik_current) + + Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) + pvalues = ndist.cdf(Z_scores) + pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + + alpha = 1. - level + quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), + final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + + return final_estimator, observed_info_mean, Z_scores, pvalues, intervals def multitasking_target(self, dispersions=None, diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 44be1c934..abd5db4a8 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -70,24 +70,24 @@ def test_multitask_lasso(ntask=2, feature_weight, ridge_term = None, randomizer_scales = randomizer_scales) - active_signs = multi_lasso.fit() + multi_lasso.fit() - ###check new target###### + # dispersions = np.array([np.linalg.norm(response_vars[i] - + # predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) + # for i in range(ntask)]) - multi_lasso.multitasking_target() + dispersions = sigma - dispersions = np.array([np.linalg.norm(response_vars[i] - - predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) - for i in range(ntask)]) + estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference(dispersions=dispersions) - estimate, observed_info_mean, _, _, intervals = multi_lasso.selective_MLE(dispersions= dispersions) - - beta_target = [] + beta_target_ = [] for j in range(ntask): - beta_target.extend(np.linalg.pinv((predictor_vars[j])[:, (active_signs[:, j] != 0)]).dot(predictor_vars[j].dot(beta[:,j]))) + beta_target_.extend(np.linalg.pinv((predictor_vars[j])[:, multi_lasso.active_global]).dot(predictor_vars[j].dot(beta[:,j]))) + + beta_target_ = np.asarray(beta_target_) + beta_target = multi_lasso.W_coef.dot(beta_target_) - beta_target = np.asarray(beta_target) print("check size range ", np.amax(np.fabs(beta_target)), np.amin(np.fabs(beta_target))) coverage = (beta_target > intervals[:, 0]) * (beta_target < @@ -105,15 +105,15 @@ def test_coverage(nsim=100): for n in range(nsim): - coverage, length = test_multitask_lasso(ntask=5, - nsamples=500 * np.ones(5), + coverage, length = test_multitask_lasso(ntask=3, + nsamples=500 * np.ones(3), p=50, global_sparsity=.95, task_sparsity=0.20, - sigma=1.*np.ones(5), + sigma=1.*np.ones(3), signal_fac=1., - rhos=0.50*np.ones(5), - weight=1.2) + rhos=0.50*np.ones(3), + weight=1.) if coverage.sum()<= 0.10: break @@ -126,5 +126,4 @@ def test_coverage(nsim=100): print("length so far ", np.mean(np.asarray(length))) if __name__ == "__main__": - - test_coverage(nsim=100) \ No newline at end of file + test_coverage(nsim=10) \ No newline at end of file From d6acc3034e1d36dfea3ffed94d1971e05b126c6c Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Mon, 17 Aug 2020 22:33:19 -0400 Subject: [PATCH 18/26] commit before switch --- selectinf/randomized/multitask_lasso.py | 2 +- selectinf/randomized/tests/test_multitask_lasso.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index c2fb8b6da..e2b7b63e4 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -304,7 +304,7 @@ def multitasking_likelihood(self, log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. - ## based on this we now write the log-likelihood, gradient and hessian + ## based on the O problem, we now write the log-likelihood, its gradient and hessian log_lik = -((self.observed_target - par).T.dot(self.prec_target).dot(self.observed_target - par)) / 2. - log_normalizer diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index abd5db4a8..f099d2e4b 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -115,9 +115,6 @@ def test_coverage(nsim=100): rhos=0.50*np.ones(3), weight=1.) - if coverage.sum()<= 0.10: - break - cov.extend(coverage) len.extend(length) @@ -126,4 +123,4 @@ def test_coverage(nsim=100): print("length so far ", np.mean(np.asarray(length))) if __name__ == "__main__": - test_coverage(nsim=10) \ No newline at end of file + test_coverage(nsim=50) \ No newline at end of file From 52669e8e2828c584347464da9fc70a448d1b5de7 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Tue, 15 Sep 2020 12:10:38 -0400 Subject: [PATCH 19/26] commit changes to multi-task Lasso so far --- selectinf/randomized/tests/test_multitask_lasso.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index f099d2e4b..a4cca2814 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -105,14 +105,14 @@ def test_coverage(nsim=100): for n in range(nsim): - coverage, length = test_multitask_lasso(ntask=3, - nsamples=500 * np.ones(3), + coverage, length = test_multitask_lasso(ntask=4, + nsamples=500 * np.ones(4), p=50, global_sparsity=.95, - task_sparsity=0.20, - sigma=1.*np.ones(3), - signal_fac=1., - rhos=0.50*np.ones(3), + task_sparsity=0.50, + sigma=1*np.ones(4), + signal_fac=0.2, + rhos=0.50*np.ones(4), weight=1.) cov.extend(coverage) From 7a5b9eba276826ae4786299640e58339fd237661 Mon Sep 17 00:00:00 2001 From: snigdhagit Date: Thu, 24 Sep 2020 17:57:11 -0400 Subject: [PATCH 20/26] quick fix on cons: need check --- selectinf/randomized/multitask_lasso.py | 24 ++++++++++++++++--- .../randomized/tests/test_multitask_lasso.py | 2 +- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index e2b7b63e4..191a181af 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -207,15 +207,33 @@ def signed_basis_vector(p, j, s): ##forming linear constraints on our optimization variables - self.linear_con = -np.identity(opt_vars) - self.offset_con = np.zeros(opt_vars) + ##definining the truncation on sums of our opt variables for each j + + con_vec = self.seltasks[self.seltasks-1 > 0] -1 + _len_var = np.cumsum(con_vec) + + _con_sum = np.zeros((_len_var.shape[0], opt_vars)) + _con_trunc = (observed_opt_states_[opt_vars:])[self.seltasks-1 > 0] + + for j in range(_len_var.shape[0]): + + if j==0: + (_con_sum[j, :])[:_len_var[j]] = 1 + elif j== (_len_var[-1]-1): + (_con_sum[j, :])[(_len_var[j-1]+1):]=1 + else: + (_con_sum[j, :])[_len_var[j-1]:_len_var[j]]=1 + + self.linear_con = np.vstack((-np.identity(opt_vars), _con_sum)) + self.offset_con = np.append(np.zeros(opt_vars), _con_trunc) self.opt_linears = opt_linears self.opt_offsets = opt_offsets self.observed_opt_states = observed_opt_states self.observed_score_states = scores - print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con)<0).sum(), opt_vars, self.seltasks) + print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con)<0).sum(), self.linear_con.shape[0], + opt_vars, self.seltasks.sum()) return active_signs diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index a4cca2814..b65ee2d31 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -123,4 +123,4 @@ def test_coverage(nsim=100): print("length so far ", np.mean(np.asarray(length))) if __name__ == "__main__": - test_coverage(nsim=50) \ No newline at end of file + test_coverage(nsim=1) \ No newline at end of file From 93a297266ab7cb6b74b3cb37275fef73bd86c345 Mon Sep 17 00:00:00 2001 From: Snigdha Panigrahi Date: Sun, 10 Jan 2021 13:28:47 -0500 Subject: [PATCH 21/26] added inference for heterogeneous model --- selectinf/randomized/multitask_lasso.py | 124 ++++++++++++--- .../randomized/tests/test_multitask_lasso.py | 150 +++++++++++------- 2 files changed, 199 insertions(+), 75 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 191a181af..e0dbb828b 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -35,10 +35,10 @@ def fit(self, p = self.nfeature - ## solve multitasking problem + ## solve multitask problem (self.initial_solns, - self.initial_subgrads) = self._solve_multitasking_problem(perturbations=perturbations) + self.initial_subgrads) = self._solve_multitask_problem(perturbations=perturbations) ##setting up some initial objects to form our K.K.T map which loops over the K regression tasks @@ -256,7 +256,7 @@ def _setup_implied_gaussian(self): def _set_marginal_parameters(self, dispersions= None): - observed_target, cov_target, cov_target_score = self.multitasking_target(dispersions = dispersions) + observed_target, cov_target, cov_target_score = self.multitask_target_global(dispersions = dispersions) ntarget = observed_target.shape[0] prec_target = np.linalg.inv(cov_target) @@ -300,8 +300,8 @@ def _set_marginal_parameters(self, self.D = D - def multitasking_likelihood(self, - target_parameter): + def multitask_likelihood_global(self, + target_parameter): ##this is the O problem @@ -334,20 +334,70 @@ def multitasking_likelihood(self, return log_lik, grad_lik, hess_lik - def multitask_inference(self, - dispersions=None, - step=1, - nstep = 1000, - min_its = 200, - tol = 1.e-8, - level = 0.9): + def multitask_inference_hetero(self, + solve_args={'tol': 1.e-12}, + level=0.9, + dispersions=None): + + observed_target, cov_target, cov_target_score = self.multitask_target_hetero(dispersions) + + cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + + init_soln = self.observed_opt_states + + linear_part = self.linear_con + offset = self.offset_con + + prec_opt = cond_precision + conjugate_arg = prec_opt.dot(cond_mean) + + solver = solve_barrier_affine_py + + val, soln, hess = solver(conjugate_arg, + prec_opt, + init_soln, + linear_part, + offset, + step=1, + nstep=5000, + min_its=3000, + tol=1.e-12) + + prec_target = np.linalg.inv(cov_target) + + target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) + + final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + + L = target_lin.T.dot(prec_opt) + observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) + observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) + + Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) + pvalues = ndist.cdf(Z_scores) + pvalues = 2 * np.minimum(pvalues, 1 - pvalues) + + alpha = 1. - level + quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), + final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T + + return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + + def multitask_inference_global(self, + dispersions=None, + step=1, + nstep = 1000, + min_its = 200, + tol = 1.e-8, + level = 0.9): self._set_marginal_parameters(dispersions=dispersions) current = self.initial_estimate_mle current_value = np.inf - log_lik_current, grad_lik_current, hess_lik_current = self.multitasking_likelihood(current) + log_lik_current, grad_lik_current, hess_lik_current = self.multitask_likelihood_global(current) for itercount in range(nstep): newton_step = -grad_lik_current @@ -358,7 +408,7 @@ def multitask_inference(self, count += 1 proposal = current - step * newton_step - log_lik_proposal, grad_lik_proposal, hess_lik_proposal = self.multitasking_likelihood(proposal) + log_lik_proposal, grad_lik_proposal, hess_lik_proposal = self.multitask_likelihood_global(proposal) proposed_value = -log_lik_proposal if proposed_value <= current_value: @@ -402,9 +452,9 @@ def multitask_inference(self, return final_estimator, observed_info_mean, Z_scores, pvalues, intervals - def multitasking_target(self, - dispersions=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): + def multitask_target_global(self, + dispersions=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): observed_targets = [] cov_targets = np.array([]) @@ -441,6 +491,44 @@ def multitasking_target(self, cov_targets[1:, :], crosscov_target_scores[1:, :]) + def multitask_target_hetero(self, + dispersions=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + observed_targets = [] + cov_targets = np.array([]) + crosscov_target_scores = np.array([]) + + for j in range(self.ntask): + + X, y = self.loglikes[j].data + n, p = X.shape + features = self._active[:, j] + W = self.loglikes[j].saturated_loss.hessian(X.dot(self.beta_bar[:, j])) + + Xfeat = X[:, features] + Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) + + observed_target = restricted_estimator(self.loglikes[j], features, solve_args=solve_args) + cov_target = np.linalg.inv(Qfeat) + _score_linear = -Xfeat.T.dot(W[:, None] * X).T + + crosscov_target_score = _score_linear.dot(cov_target) + + if dispersions is None: # use Pearson's X^2 + dispersion = ((y - self.loglikes[j].saturated_loss.mean_function( + Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + else: + dispersion = dispersions[j] + + observed_targets.extend(observed_target) + crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) + cov_targets = block_diag(cov_targets, cov_target * dispersion) + + return (np.asarray(observed_targets), + cov_targets[1:, :], + crosscov_target_scores[1:, :]) + def _solve_randomized_problem(self, penalty, solve_args={'tol': 1.e-12, 'min_its': 50}): @@ -461,7 +549,7 @@ def _solve_randomized_problem(self, return initial_solns, initial_subgrads - def _solve_multitasking_problem(self, perturbations=None, num_iter=1000, atol=1.e-5): + def _solve_multitask_problem(self, perturbations=None, num_iter=1000, atol=1.e-5): if perturbations is not None: self._initial_omega = perturbations diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index b65ee2d31..ca73abdfe 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -4,44 +4,15 @@ from ..multitask_lasso import multi_task_lasso from ...tests.instance import gaussian_multitask_instance -def main(): - - K = 4 - sample_sizes = (200, 200, 200, 200) - p = 10 - beta = np.random.random((p, K)) - - global_sparsity_rate = .90 - task_sparsity_rate = .50 - global_zeros = np.random.choice(p,int(round(global_sparsity_rate*p))) - - beta[global_zeros,:] = np.zeros((K,)) - for i in np.delete(range(p),global_zeros): - beta[i,np.random.choice(K,int(round(task_sparsity_rate * K)))] = 0. - print("beta ", beta) - - predictor_vars = {i: np.random.random((sample_sizes[i], p)) for i in range(K)} - response_vars = {i: np.dot(predictor_vars[i], beta[:, i]) for i in range(K)} - feature_weight = 1.25 * np.ones(p) - randomizer_scales = np.ones(K) - - multi_lasso = multi_task_lasso.gaussian(predictor_vars, - response_vars, - feature_weight, - randomizer_scales = randomizer_scales) - - print(multi_lasso.fit()) - - -def test_multitask_lasso(ntask=2, - nsamples=500 * np.ones(2), - p=100, - global_sparsity=.8, - task_sparsity=.3, - sigma=1.*np.ones(2), - signal_fac= 0.5, - rhos=0.*np.ones(2), - weight=2.): +def test_multitask_lasso_global(ntask=2, + nsamples=500 * np.ones(2), + p=100, + global_sparsity=.8, + task_sparsity=.3, + sigma=1.*np.ones(2), + signal_fac= 0.5, + rhos=0.*np.ones(2), + weight=2.): nsamples = nsamples.astype(int) @@ -68,17 +39,17 @@ def test_multitask_lasso(ntask=2, multi_lasso = multi_task_lasso.gaussian(predictor_vars, response_vars, feature_weight, - ridge_term = None, - randomizer_scales = randomizer_scales) + ridge_term=None, + randomizer_scales=randomizer_scales) multi_lasso.fit() # dispersions = np.array([np.linalg.norm(response_vars[i] - # predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) # for i in range(ntask)]) - dispersions = sigma + dispersions = sigma ** 2 - estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference(dispersions=dispersions) + estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_global(dispersions=dispersions) beta_target_ = [] @@ -88,16 +59,65 @@ def test_multitask_lasso(ntask=2, beta_target_ = np.asarray(beta_target_) beta_target = multi_lasso.W_coef.dot(beta_target_) - print("check size range ", np.amax(np.fabs(beta_target)), np.amin(np.fabs(beta_target))) - coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) - print("coverage ", coverage) - if coverage.sum() <= 0.10: - print(intervals, multi_lasso.observed_opt_states, estimate) return coverage, intervals[:, 1]- intervals[:, 0] + +def test_multitask_lasso_hetero(ntask=2, + nsamples=500 * np.ones(2), + p=100, + global_sparsity=.8, + task_sparsity=.3, + sigma=1. * np.ones(2), + signal_fac=0.5, + rhos=0. * np.ones(2), + weight=2.): + + nsamples = nsamples.astype(int) + + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos)[:3] + + feature_weight = weight * 1. * np.sqrt(2 * np.log(p)) * np.ones(p) + + sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) + + randomizer_scales = 1. * sigmas_ + + multi_lasso = multi_task_lasso.gaussian(predictor_vars, + response_vars, + feature_weight, + ridge_term=None, + randomizer_scales=randomizer_scales) + active_signs = multi_lasso.fit() + + dispersions = sigma + + estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_hetero(dispersions=dispersions) + + beta_target = [] + + for j in range(ntask): + beta_target.extend(np.linalg.pinv((predictor_vars[j])[:, (active_signs[:, j] != 0)]).dot(predictor_vars[j].dot(beta[:, j]))) + + beta_target = np.asarray(beta_target) + + coverage = (beta_target > intervals[:, 0]) * (beta_target < + intervals[:, 1]) + + return coverage, intervals[:, 1] - intervals[:, 0] + + def test_coverage(nsim=100): cov = [] @@ -105,15 +125,30 @@ def test_coverage(nsim=100): for n in range(nsim): - coverage, length = test_multitask_lasso(ntask=4, - nsamples=500 * np.ones(4), - p=50, - global_sparsity=.95, - task_sparsity=0.50, - sigma=1*np.ones(4), - signal_fac=0.2, - rhos=0.50*np.ones(4), - weight=1.) + ntask = 5 + + # coverage, length = test_multitask_lasso_global(ntask=ntask, + # nsamples=500 * np.ones(ntask), + # p=50, + # global_sparsity=.95, + # task_sparsity=0.50, + # sigma=1 * np.ones(ntask), + # signal_fac=0.2, + # rhos=0.30 * np.ones(ntask), + # weight=1.) + + coverage, length = test_multitask_lasso_hetero(ntask=ntask, + nsamples=3000 * np.ones(ntask), + p=100, + global_sparsity=.90, + task_sparsity=0.50, + sigma=1.*np.ones(ntask), + signal_fac=0.5, + rhos=0.20*np.ones(ntask), + weight=1.) + + if coverage.sum() <= 0.10: + break cov.extend(coverage) len.extend(length) @@ -123,4 +158,5 @@ def test_coverage(nsim=100): print("length so far ", np.mean(np.asarray(length))) if __name__ == "__main__": - test_coverage(nsim=1) \ No newline at end of file + + test_coverage(nsim=100) \ No newline at end of file From c28c2ea744a550cacabe10bc0d70f124ddd2341d Mon Sep 17 00:00:00 2001 From: Snigdha Panigrahi Date: Mon, 11 Jan 2021 15:43:44 -0500 Subject: [PATCH 22/26] fix dispersion bug --- selectinf/randomized/tests/test_multitask_lasso.py | 14 +++++++------- selectinf/tests/instance.py | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index ca73abdfe..069e30908 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -27,7 +27,7 @@ def test_multitask_lasso_global(ntask=2, signal, rhos)[:3] - feature_weight = weight * 1. * np.sqrt(2 * np.log(p)) * np.ones(p) + feature_weight = weight * np.sqrt(2 * np.log(p)) * np.ones(p) sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) @@ -88,11 +88,11 @@ def test_multitask_lasso_hetero(ntask=2, signal, rhos)[:3] - feature_weight = weight * 1. * np.sqrt(2 * np.log(p)) * np.ones(p) + feature_weight = weight * np.sqrt(2 * np.log(p)) * np.ones(p) sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - randomizer_scales = 1. * sigmas_ + randomizer_scales = 0.71 * sigmas_ multi_lasso = multi_task_lasso.gaussian(predictor_vars, response_vars, @@ -101,7 +101,7 @@ def test_multitask_lasso_hetero(ntask=2, randomizer_scales=randomizer_scales) active_signs = multi_lasso.fit() - dispersions = sigma + dispersions = sigma ** 2 estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_hetero(dispersions=dispersions) @@ -140,11 +140,11 @@ def test_coverage(nsim=100): coverage, length = test_multitask_lasso_hetero(ntask=ntask, nsamples=3000 * np.ones(ntask), p=100, - global_sparsity=.90, + global_sparsity=.80, task_sparsity=0.50, sigma=1.*np.ones(ntask), - signal_fac=0.5, - rhos=0.20*np.ones(ntask), + signal_fac=np.array([0.2, 1.]), + rhos=0.30*np.ones(ntask), weight=1.) if coverage.sum() <= 0.10: diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py index 77046852a..1965a7bc6 100644 --- a/selectinf/tests/instance.py +++ b/selectinf/tests/instance.py @@ -400,10 +400,10 @@ def gaussian_multitask_instance(ntask, if signal.shape == (1,): beta = float(signal[0]) * np.ones((p,ntask)) - global_nulls = np.random.choice(p, int(round(global_sparsity * p))) + global_nulls = np.random.choice(p, int(round(global_sparsity * p)), replace=False) beta[global_nulls, :] = np.zeros((ntask,)) for i in np.delete(range(p), global_nulls): - beta[i, np.random.choice(ntask, int(round(task_sparsity * ntask)))] = 0. + beta[i, np.random.choice(ntask, int(round(task_sparsity * ntask)), replace=False)] = 0. else: beta = np.ones((p,ntask)) From 9493cacc0c49263b996d4bd6e6fee0192803e19f Mon Sep 17 00:00:00 2001 From: Snigdha Panigrahi Date: Mon, 11 Jan 2021 22:21:20 -0500 Subject: [PATCH 23/26] some more clean up --- selectinf/randomized/multitask_lasso.py | 98 +++++++++---------- .../randomized/tests/test_multitask_lasso.py | 10 +- selectinf/tests/instance.py | 9 +- 3 files changed, 59 insertions(+), 58 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index e0dbb828b..72f33a600 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -79,8 +79,8 @@ def fit(self, self.beta_bar = beta_bar seltasks = np.array([active[j,:].sum() for j in range(p)]) - self.active_global = np.asarray([j for j in range(p) if seltasks[j]>0]) - self.seltasks = seltasks[seltasks>0] + self.active_global = np.asarray([j for j in range(p) if seltasks[j] > 0]) + self.seltasks = seltasks[seltasks > 0] self.selection_variable = {'sign': active_signs.copy(), 'variables': ordered_variables} @@ -203,13 +203,13 @@ def signed_basis_vector(p, j, s): ##check K.K.T map - print("check K.K.T. map", np.allclose(omegas, scores + opt_linears.dot(observed_opt_states)+ opt_offsets, atol=1e-03)) + print("check K.K.T. map", np.allclose(omegas, scores + opt_linears.dot(observed_opt_states) + opt_offsets, atol=1e-05)) ##forming linear constraints on our optimization variables ##definining the truncation on sums of our opt variables for each j - con_vec = self.seltasks[self.seltasks-1 > 0] -1 + con_vec = self.seltasks[self.seltasks-1 > 0] - 1 _len_var = np.cumsum(con_vec) _con_sum = np.zeros((_len_var.shape[0], opt_vars)) @@ -220,9 +220,9 @@ def signed_basis_vector(p, j, s): if j==0: (_con_sum[j, :])[:_len_var[j]] = 1 elif j== (_len_var[-1]-1): - (_con_sum[j, :])[(_len_var[j-1]+1):]=1 + (_con_sum[j, :])[(_len_var[j-1]+1):] = 1 else: - (_con_sum[j, :])[_len_var[j-1]:_len_var[j]]=1 + (_con_sum[j, :])[_len_var[j-1]:_len_var[j]] = 1 self.linear_con = np.vstack((-np.identity(opt_vars), _con_sum)) self.offset_con = np.append(np.zeros(opt_vars), _con_trunc) @@ -232,7 +232,7 @@ def signed_basis_vector(p, j, s): self.observed_opt_states = observed_opt_states self.observed_score_states = scores - print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con)<0).sum(), self.linear_con.shape[0], + print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con) < 0).sum(), self.linear_con.shape[0], opt_vars, self.seltasks.sum()) return active_signs @@ -243,6 +243,7 @@ def _setup_implied_gaussian(self): for i in range(self.ntask): _, prec = self.randomizers[i].cov_prec precs = block_diag(precs, prec * np.identity(self.nfeature)) + precs = precs[1:, :] cond_precision = self.opt_linears.T.dot(precs.dot(self.opt_linears)) @@ -335,7 +336,6 @@ def multitask_likelihood_global(self, return log_lik, grad_lik, hess_lik def multitask_inference_hetero(self, - solve_args={'tol': 1.e-12}, level=0.9, dispersions=None): @@ -359,9 +359,9 @@ def multitask_inference_hetero(self, linear_part, offset, step=1, - nstep=5000, + nstep=8000, min_its=3000, - tol=1.e-12) + tol=1.e-15) prec_target = np.linalg.inv(cov_target) @@ -384,6 +384,44 @@ def multitask_inference_hetero(self, return final_estimator, observed_info_mean, Z_scores, pvalues, intervals + def multitask_target_hetero(self, + dispersions=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + observed_targets = [] + cov_targets = np.array([]) + crosscov_target_scores = np.array([]) + + for j in range(self.ntask): + + X, y = self.loglikes[j].data + n, p = X.shape + features = self._active[:, j] + W = self.loglikes[j].saturated_loss.hessian(X.dot(self.beta_bar[:, j])) + + Xfeat = X[:, features] + Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) + + observed_target = restricted_estimator(self.loglikes[j], features, solve_args=solve_args) + cov_target = np.linalg.inv(Qfeat) + _score_linear = -Xfeat.T.dot(W[:, None] * X).T + + crosscov_target_score = _score_linear.dot(cov_target) + + if dispersions is None: # use Pearson's X^2 + dispersion = ((y - self.loglikes[j].saturated_loss.mean_function( + Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + else: + dispersion = dispersions[j] + + observed_targets.extend(observed_target) + crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) + cov_targets = block_diag(cov_targets, cov_target * dispersion) + + return (np.asarray(observed_targets), + cov_targets[1:, :], + crosscov_target_scores[1:, :]) + def multitask_inference_global(self, dispersions=None, step=1, @@ -491,44 +529,6 @@ def multitask_target_global(self, cov_targets[1:, :], crosscov_target_scores[1:, :]) - def multitask_target_hetero(self, - dispersions=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): - - observed_targets = [] - cov_targets = np.array([]) - crosscov_target_scores = np.array([]) - - for j in range(self.ntask): - - X, y = self.loglikes[j].data - n, p = X.shape - features = self._active[:, j] - W = self.loglikes[j].saturated_loss.hessian(X.dot(self.beta_bar[:, j])) - - Xfeat = X[:, features] - Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) - - observed_target = restricted_estimator(self.loglikes[j], features, solve_args=solve_args) - cov_target = np.linalg.inv(Qfeat) - _score_linear = -Xfeat.T.dot(W[:, None] * X).T - - crosscov_target_score = _score_linear.dot(cov_target) - - if dispersions is None: # use Pearson's X^2 - dispersion = ((y - self.loglikes[j].saturated_loss.mean_function( - Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) - else: - dispersion = dispersions[j] - - observed_targets.extend(observed_target) - crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) - cov_targets = block_diag(cov_targets, cov_target * dispersion) - - return (np.asarray(observed_targets), - cov_targets[1:, :], - crosscov_target_scores[1:, :]) - def _solve_randomized_problem(self, penalty, solve_args={'tol': 1.e-12, 'min_its': 50}): @@ -558,7 +558,7 @@ def _solve_multitask_problem(self, perturbations=None, num_iter=1000, atol=1.e-5 self._initial_omega = np.array([self.randomizers[i].sample() for i in range(self.ntask)]).T penalty_init = rr.weighted_l1norm(self.feature_weight, lagrange=1.) - solution_init = self._solve_randomized_problem(penalty= penalty_init) + solution_init = self._solve_randomized_problem(penalty=penalty_init) beta = solution_init[0].T for itercount in range(num_iter): diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 069e30908..3e486e238 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -31,7 +31,7 @@ def test_multitask_lasso_global(ntask=2, sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - randomizer_scales = 0.71 * sigmas_ + randomizer_scales = 1. * sigmas_ #ridge_terms = np.array([np.std(response_vars[i]) * np.sqrt(np.mean((predictor_vars[i] ** 2).sum(0)))/ np.sqrt(nsamples[i] - 1) # for i in range(ntask)]) @@ -91,7 +91,7 @@ def test_multitask_lasso_hetero(ntask=2, feature_weight = weight * np.sqrt(2 * np.log(p)) * np.ones(p) sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - + #sigmas_ = sigma randomizer_scales = 0.71 * sigmas_ multi_lasso = multi_task_lasso.gaussian(predictor_vars, @@ -139,11 +139,11 @@ def test_coverage(nsim=100): coverage, length = test_multitask_lasso_hetero(ntask=ntask, nsamples=3000 * np.ones(ntask), - p=100, - global_sparsity=.80, + p=200, + global_sparsity=.90, task_sparsity=0.50, sigma=1.*np.ones(ntask), - signal_fac=np.array([0.2, 1.]), + signal_fac=np.array([0.2, 0.5]), rhos=0.30*np.ones(ntask), weight=1.) diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py index 1965a7bc6..85ab5d43a 100644 --- a/selectinf/tests/instance.py +++ b/selectinf/tests/instance.py @@ -390,7 +390,8 @@ def gaussian_multitask_instance(ntask, equicorrelated=True): - predictor_vars = {i: _design(nsamples[i], p, rhos[i], equicorrelated)[0] for i in range(ntask)} + predictor_vars= {i: _design(nsamples[i], p, rhos[i], equicorrelated)[0] for i in range(ntask)} + cov_matrices = [_design(nsamples[i], p, rhos[i], equicorrelated)[1] for i in range(ntask)] if center: @@ -410,8 +411,8 @@ def gaussian_multitask_instance(ntask, nsignal = int(round(global_sparsity * p)) global_nulls = np.random.choice(p, nsignal, replace=False) beta[global_nulls, :] = np.zeros((ntask,)) + non_nulls = np.setdiff1d(np.arange(p), global_nulls) for j in range(ntask): - non_nulls = np.setdiff1d(np.arange(p), global_nulls) beta[non_nulls, j] = np.linspace(float(signal[0]), float(signal[1]), num=p-nsignal) for i in np.delete(range(p), global_nulls): @@ -425,7 +426,7 @@ def gaussian_multitask_instance(ntask, if scale: scalings = {i: predictor_vars[i].std(0) * np.sqrt(nsamples[i]) for i in range(ntask)} predictor_vars = {i: predictor_vars[i]/scalings[i][None, :] for i in range(ntask)} - beta *= np.sqrt(np.asarray(nsamples)) + beta *= [np.sqrt(nsamples[i]) for i in range(len(nsamples))] cov_matrices = {i: cov_matrices[i] / np.multiply.outer(scalings[i], scalings[i]) for i in range(ntask)} active = np.zeros((p, ntask), np.bool) @@ -439,6 +440,6 @@ def _noise(n, df=np.inf): sd_t = np.std(tdist.rvs(df, size=50000)) return tdist.rvs(df, size=n) / sd_t - response_vars = {i: (predictor_vars[i].dot(beta[:,i]) + _noise(nsamples[i], df)) * sigma[i] for i in range(ntask)} + response_vars = {i: (predictor_vars[i].dot(beta[:, i]) + _noise(nsamples[i], df)) * sigma[i] for i in range(ntask)} return response_vars, predictor_vars, beta * sigma, np.nonzero(active), sigma, cov_matrices \ No newline at end of file From cc8f6119dba2131b5f60ba46e273faf9c1f9ed87 Mon Sep 17 00:00:00 2001 From: Snigdha Panigrahi Date: Fri, 29 Jan 2021 07:58:44 -0500 Subject: [PATCH 24/26] changes to instance and penalties --- selectinf/randomized/multitask_lasso.py | 356 +++++++++--------- .../randomized/tests/test_multitask_lasso.py | 320 ++++++++++++---- .../tests/test_multitask_lasso_pivot.py | 0 selectinf/tests/instance.py | 30 +- 4 files changed, 448 insertions(+), 258 deletions(-) create mode 100644 selectinf/randomized/tests/test_multitask_lasso_pivot.py diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 72f33a600..67f8a28af 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -3,11 +3,10 @@ from scipy.stats import norm as ndist import regreg.api as rr -from .query import gaussian_query from .randomization import randomization from ..base import restricted_estimator -class multi_task_lasso(gaussian_query): +class multi_task_lasso(): def __init__(self, loglikes, @@ -16,7 +15,7 @@ def __init__(self, randomizers, nfeature, ntask, - perturbations=None): + perturbations): self.loglikes = loglikes @@ -38,7 +37,8 @@ def fit(self, ## solve multitask problem (self.initial_solns, - self.initial_subgrads) = self._solve_multitask_problem(perturbations=perturbations) + self.initial_subgrads, + penalty_weight) = self._solve_multitask_problem(perturbations=perturbations) ##setting up some initial objects to form our K.K.T map which loops over the K regression tasks @@ -78,7 +78,7 @@ def fit(self, self.beta_bar = beta_bar - seltasks = np.array([active[j,:].sum() for j in range(p)]) + seltasks = np.array([active[j, :].sum() for j in range(p)]) self.active_global = np.asarray([j for j in range(p) if seltasks[j] > 0]) self.seltasks = seltasks[seltasks > 0] @@ -130,7 +130,7 @@ def signed_basis_vector(p, j, s): Tau = np.delete(Tau, np.array(np.cumsum(self.seltasks) - 1), 0) Tau = np.vstack((Tau, A_)) - ##my final tranformation is o_new = Tau.dot(Psi).dot(Pi).dot(o) + ## final tranformation is v = Tau.dot(Psi).dot(Pi).dot(o) CoV = Tau.dot(Psi).dot(Pi) @@ -138,7 +138,7 @@ def signed_basis_vector(p, j, s): _score_linear_term = {} observed_score_state = {} - opt_offset = {i: self.initial_subgrads[:, i] for i in range(self.ntask)} + _opt_offset = {i: self.initial_subgrads[:, i] for i in range(self.ntask)} omegas = [] scores = [] @@ -148,7 +148,7 @@ def signed_basis_vector(p, j, s): for i in range(self.ntask): X, y = self.loglikes[i].data - W = self.loglikes[i].saturated_loss.hessian(X.dot(beta_bar[:,i])) + W = self.loglikes[i].saturated_loss.hessian(X.dot(beta_bar[:, i])) _hessian_active = np.dot(X.T, X[:, active[:, i]] * W[:, None]) _score_linear_term[i] = -_hessian_active @@ -159,8 +159,7 @@ def signed_basis_vector(p, j, s): active_directions = np.array([signed_basis_vector(p, k, - (active_signs[:,i])[k]) - for k in np.nonzero(active[:,i])[0]]).T + (active_signs[:,i])[k]) for k in np.nonzero(active[:,i])[0]]).T _opt_linear = np.zeros((p, num_opt_var[i])) scaling_slice = slice(0, active[:,i].sum()) @@ -176,7 +175,7 @@ def signed_basis_vector(p, j, s): scores.append(observed_score_state[i]) - opt_offsets_.append(opt_offset[i]) + opt_offsets_.append(_opt_offset[i]) observed_opt_states_.extend(initial_scalings[i]) @@ -201,10 +200,6 @@ def signed_basis_vector(p, j, s): observed_opt_states = observed_opt_states_[:opt_vars] - ##check K.K.T map - - print("check K.K.T. map", np.allclose(omegas, scores + opt_linears.dot(observed_opt_states) + opt_offsets, atol=1e-05)) - ##forming linear constraints on our optimization variables ##definining the truncation on sums of our opt variables for each j @@ -218,11 +213,9 @@ def signed_basis_vector(p, j, s): for j in range(_len_var.shape[0]): if j==0: - (_con_sum[j, :])[:_len_var[j]] = 1 - elif j== (_len_var[-1]-1): - (_con_sum[j, :])[(_len_var[j-1]+1):] = 1 + _con_sum[j, :][:_len_var[j]] = 1 else: - (_con_sum[j, :])[_len_var[j-1]:_len_var[j]] = 1 + _con_sum[j, :][_len_var[j-1]:_len_var[j]] = 1 self.linear_con = np.vstack((-np.identity(opt_vars), _con_sum)) self.offset_con = np.append(np.zeros(opt_vars), _con_trunc) @@ -232,8 +225,13 @@ def signed_basis_vector(p, j, s): self.observed_opt_states = observed_opt_states self.observed_score_states = scores - print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con) < 0).sum(), self.linear_con.shape[0], - opt_vars, self.seltasks.sum()) + self.omegas = omegas + self.active_signs = active_signs + + print("check signs of observed opt_states ", ((self.linear_con.dot(observed_opt_states)-self.offset_con) < 0).sum(), + self.linear_con.shape[0], opt_vars, observed_opt_states.shape[0], self.seltasks.sum()) + print("check K.K.T. map", + np.allclose(omegas, self.observed_score_states + self.opt_linears.dot(self.observed_opt_states) + self.opt_offsets, atol=1e-05)) return active_signs @@ -252,120 +250,42 @@ def _setup_implied_gaussian(self): cond_mean = -logdens_linear.dot(self.observed_score_states + self.opt_offsets) - return cond_mean, cond_cov, cond_precision, logdens_linear - - def _set_marginal_parameters(self, - dispersions= None): - - observed_target, cov_target, cov_target_score = self.multitask_target_global(dispersions = dispersions) - ntarget = observed_target.shape[0] - - prec_target = np.linalg.inv(cov_target) - - cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() - nopt = cond_cov.shape[0] - - target_linear = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) - - implied_precision = np.zeros((ntarget + nopt, ntarget + nopt)) - implied_precision[:ntarget][:, :ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear))) - implied_precision[:ntarget][:, ntarget:] = -target_linear.T.dot(cond_precision) - implied_precision[ntarget:][:, :ntarget] = (-target_linear.T.dot(cond_precision)).T - - implied_precision[ntarget:][:, ntarget:] = cond_precision - - implied_cov = np.linalg.inv(implied_precision) - - self.linear_coef = implied_cov[ntarget:][:, :ntarget].dot(prec_target) - - target_offset = cond_mean - target_linear.dot(observed_target) - M = implied_cov[ntarget:][:, ntarget:].dot(cond_precision.dot(target_offset)) - N = -target_linear.T.dot(cond_precision).dot(target_offset) - - self.offset_coef = implied_cov[ntarget:][:, :ntarget].dot(N) + M - - self.cov_marginal = implied_cov[ntarget:][:, ntarget:] - self.prec_marginal = np.linalg.inv(self.cov_marginal) - - self.observed_target = observed_target - self.prec_target = prec_target - - D = np.identity(self.active_global.shape[0]) - for i in range(self.ntask - 1): - D = np.block([[D], [np.identity(self.active_global.shape[0])]]) - - - self.W_coef = np.linalg.inv(D.T.dot(self.prec_target).dot(D)).dot(D.T.dot(self.prec_target)) - - self.initial_estimate_mle = self.W_coef.dot(self.observed_target) - - self.D = D - - def multitask_likelihood_global(self, - target_parameter): - - ##this is the O problem + self.cond_mean = cond_mean + self.cond_cov = cond_cov + self.cond_precision = cond_precision + self.logdens_linear = logdens_linear - par = self.D.dot(target_parameter) - mean_marginal = self.linear_coef.dot(par) + self.offset_coef - prec_marginal = self.prec_marginal - conjugate_marginal = prec_marginal.dot(mean_marginal) - - val, soln, hess = solve_barrier_affine_py(conjugate_marginal, - prec_marginal, - self.observed_opt_states, - self.linear_con, - self.offset_con, - step=1, - nstep=5000, - min_its=1000, - tol=1.e-12) - - log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. - - ## based on the O problem, we now write the log-likelihood, its gradient and hessian - - log_lik = -((self.observed_target - par).T.dot(self.prec_target).dot(self.observed_target - par)) / 2. - log_normalizer - - grad_lik = self.D.T.dot(self.prec_target.dot(self.observed_target) - - self.prec_target.dot(par)- self.linear_coef.T.dot(prec_marginal.dot(soln)- conjugate_marginal)) - - hess_lik = self.D.T.dot(-self.prec_target + self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef)- - self.linear_coef.T.dot(prec_marginal).dot(hess).dot(prec_marginal.dot(self.linear_coef))).dot(self.D) - - return log_lik, grad_lik, hess_lik + return cond_mean, cond_cov, cond_precision, logdens_linear def multitask_inference_hetero(self, level=0.9, dispersions=None): - observed_target, cov_target, cov_target_score = self.multitask_target_hetero(dispersions) + self._setup_implied_gaussian() + observed_target, cov_target, cov_target_score = self.multitask_target_hetero(dispersions=dispersions) + prec_target = np.linalg.inv(cov_target) - cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + observed_target = np.atleast_1d(observed_target) init_soln = self.observed_opt_states + cond_mean = self.cond_mean + cond_precision = self.cond_precision + logdens_linear = self.logdens_linear - linear_part = self.linear_con - offset = self.offset_con + target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) prec_opt = cond_precision conjugate_arg = prec_opt.dot(cond_mean) - solver = solve_barrier_affine_py - - val, soln, hess = solver(conjugate_arg, - prec_opt, - init_soln, - linear_part, - offset, - step=1, - nstep=8000, - min_its=3000, - tol=1.e-15) - - prec_target = np.linalg.inv(cov_target) - - target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) + val, soln, hess = solve_barrier_affine_py(conjugate_arg, + prec_opt, + init_soln, + self.linear_con, + self.offset_con, + step=1., + nstep=10000, + min_its=5000, + tol=1.e-12) final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) @@ -385,51 +305,69 @@ def multitask_inference_hetero(self, return final_estimator, observed_info_mean, Z_scores, pvalues, intervals def multitask_target_hetero(self, - dispersions=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): + dispersions=None): observed_targets = [] + #ancillary_stats = [] cov_targets = np.array([]) crosscov_target_scores = np.array([]) - for j in range(self.ntask): + for i in range(self.ntask): - X, y = self.loglikes[j].data + X, y = self.loglikes[i].data n, p = X.shape - features = self._active[:, j] - W = self.loglikes[j].saturated_loss.hessian(X.dot(self.beta_bar[:, j])) + features = self._active[:, i] + W = self.loglikes[i].saturated_loss.hessian(X.dot(self.beta_bar[:, i])) Xfeat = X[:, features] Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) - observed_target = restricted_estimator(self.loglikes[j], features, solve_args=solve_args) + observed_target = np.linalg.pinv(Xfeat).dot(y) + cov_target = np.linalg.inv(Qfeat) _score_linear = -Xfeat.T.dot(W[:, None] * X).T crosscov_target_score = _score_linear.dot(cov_target) if dispersions is None: # use Pearson's X^2 - dispersion = ((y - self.loglikes[j].saturated_loss.mean_function( + dispersion = ((y - self.loglikes[i].saturated_loss.mean_function( Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) else: - dispersion = dispersions[j] + dispersion = dispersions[i] observed_targets.extend(observed_target) crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) cov_targets = block_diag(cov_targets, cov_target * dispersion) - return (np.asarray(observed_targets), - cov_targets[1:, :], - crosscov_target_scores[1:, :]) + return np.asarray(observed_targets), cov_targets[1:, :], crosscov_target_scores[1:, :] + + def _solve_randomized_problem(self, + penalty, + solve_args={'tol': 1.e-12, 'min_its': 50}): + + quad_list = [rr.identity_quadratic(self.ridge_terms[i], + 0, + -self._initial_omega[:, i], + 0) + for i in range(self.ntask)] + + problem_list = [rr.simple_problem(self.loglikes[i], penalty) for i in range(self.ntask)] + + initial_solns = np.array([problem_list[i].solve(quad_list[i], **solve_args) for i in range(self.ntask)]) + initial_subgrads = np.array([-(self.loglikes[i].smooth_objective(initial_solns[i, :], + 'grad') + + quad_list[i].objective(initial_solns[i, :], 'grad')) + for i in range(self.ntask)]) + + return initial_solns, initial_subgrads def multitask_inference_global(self, dispersions=None, step=1, - nstep = 1000, - min_its = 200, - tol = 1.e-8, - level = 0.9): - + nstep=1000, + min_its=200, + tol=1.e-8, + level=0.9): self._set_marginal_parameters(dispersions=dispersions) current = self.initial_estimate_mle @@ -460,7 +398,6 @@ def multitask_inference_global(self, raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: - current = proposal current_value = proposed_value @@ -490,10 +427,10 @@ def multitask_inference_global(self, return final_estimator, observed_info_mean, Z_scores, pvalues, intervals - def multitask_target_global(self, - dispersions=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): + def multitask_target_global(self, + dispersions=None, + solve_args={'tol': 1.e-12, 'min_its': 50}): observed_targets = [] cov_targets = np.array([]) crosscov_target_scores = np.array([]) @@ -518,7 +455,8 @@ def multitask_target_global(self, crosscov_target_score = _score_linear.dot(cov_target) if dispersions is None: - dispersion = ((y - self.loglikes[i].saturated_loss.mean_function(Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) + dispersion = ((y - self.loglikes[i].saturated_loss.mean_function( + Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) else: dispersion = dispersions[i] @@ -529,25 +467,87 @@ def multitask_target_global(self, cov_targets[1:, :], crosscov_target_scores[1:, :]) - def _solve_randomized_problem(self, - penalty, - solve_args={'tol': 1.e-12, 'min_its': 50}): + def _set_marginal_parameters(self, + dispersions=None): + observed_target, cov_target, cov_target_score = self.multitask_target_global(dispersions=dispersions) + ntarget = observed_target.shape[0] - quad_list = [rr.identity_quadratic(self.ridge_terms[i], - 0, - -self._initial_omega[:, i], - 0) - for i in range(self.ntask)] + prec_target = np.linalg.inv(cov_target) - problem_list = [rr.simple_problem(self.loglikes[i], penalty) for i in range(self.ntask)] + cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() + nopt = cond_cov.shape[0] - initial_solns = np.array([problem_list[i].solve(quad_list[i], **solve_args) for i in range(self.ntask)]) - initial_subgrads = np.array([-(self.loglikes[i].smooth_objective(initial_solns[i, :], - 'grad') + - quad_list[i].objective(initial_solns[i, :], 'grad')) - for i in range(self.ntask)]) + target_linear = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) - return initial_solns, initial_subgrads + implied_precision = np.zeros((ntarget + nopt, ntarget + nopt)) + implied_precision[:ntarget][:, :ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear))) + implied_precision[:ntarget][:, ntarget:] = -target_linear.T.dot(cond_precision) + implied_precision[ntarget:][:, :ntarget] = (-target_linear.T.dot(cond_precision)).T + + implied_precision[ntarget:][:, ntarget:] = cond_precision + + implied_cov = np.linalg.inv(implied_precision) + + self.linear_coef = implied_cov[ntarget:][:, :ntarget].dot(prec_target) + + target_offset = cond_mean - target_linear.dot(observed_target) + M = implied_cov[ntarget:][:, ntarget:].dot(cond_precision.dot(target_offset)) + N = -target_linear.T.dot(cond_precision).dot(target_offset) + + self.offset_coef = implied_cov[ntarget:][:, :ntarget].dot(N) + M + + self.cov_marginal = implied_cov[ntarget:][:, ntarget:] + self.prec_marginal = np.linalg.inv(self.cov_marginal) + + self.observed_target = observed_target + self.prec_target = prec_target + + D = np.identity(self.active_global.shape[0]) + for i in range(self.ntask - 1): + D = np.block([[D], [np.identity(self.active_global.shape[0])]]) + + self.W_coef = np.linalg.inv(D.T.dot(self.prec_target).dot(D)).dot(D.T.dot(self.prec_target)) + + self.initial_estimate_mle = self.W_coef.dot(self.observed_target) + + self.D = D + + + def multitask_likelihood_global(self, + target_parameter): + ##this is the O problem + + par = self.D.dot(target_parameter) + mean_marginal = self.linear_coef.dot(par) + self.offset_coef + prec_marginal = self.prec_marginal + conjugate_marginal = prec_marginal.dot(mean_marginal) + + val, soln, hess = solve_barrier_affine_py(conjugate_marginal, + prec_marginal, + self.observed_opt_states, + self.linear_con, + self.offset_con, + step=1, + nstep=5000, + min_its=1000, + tol=1.e-12) + + log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. + + ## based on the O problem, we now write the log-likelihood, its gradient and hessian + + log_lik = -( + (self.observed_target - par).T.dot(self.prec_target).dot(self.observed_target - par)) / 2. - log_normalizer + + grad_lik = self.D.T.dot(self.prec_target.dot(self.observed_target) - + self.prec_target.dot(par) - self.linear_coef.T.dot( + prec_marginal.dot(soln) - conjugate_marginal)) + + hess_lik = self.D.T.dot(-self.prec_target + self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef) - + self.linear_coef.T.dot(prec_marginal).dot(hess).dot( + prec_marginal.dot(self.linear_coef))).dot(self.D) + + return log_lik, grad_lik, hess_lik def _solve_multitask_problem(self, perturbations=None, num_iter=1000, atol=1.e-5): @@ -564,6 +564,7 @@ def _solve_multitask_problem(self, perturbations=None, num_iter=1000, atol=1.e-5 for itercount in range(num_iter): beta_prev = beta.copy() + sum_all_tasks = np.sum(np.absolute(beta), axis=1) penalty_weight = 1. / np.maximum(np.sqrt(sum_all_tasks), 10 ** -10) @@ -579,11 +580,11 @@ def _solve_multitask_problem(self, perturbations=None, num_iter=1000, atol=1.e-5 if np.sum(np.fabs(beta_prev - beta)) < atol: break - print("check itercount ", itercount) + print("check itercount until convergence ", itercount) subgrad = solution_current[1].T - return beta, subgrad + return beta, subgrad, feature_weight_current @staticmethod def gaussian(predictor_vars, @@ -592,7 +593,8 @@ def gaussian(predictor_vars, noise_levels=None, quadratic=None, ridge_term=None, - randomizer_scales=None): + randomizer_scales=None, + perturbations=None): ntask = len(response_vars) @@ -627,7 +629,8 @@ def gaussian(predictor_vars, ridge_terms, randomizers, nfeature, - ntask) + ntask, + perturbations) def solve_barrier_affine_py(conjugate_arg, precision, @@ -635,23 +638,21 @@ def solve_barrier_affine_py(conjugate_arg, con_linear, con_offset, step=1, - nstep=5000, - min_its=1000, - tol=1.e-12): + nstep=1000, + min_its=200, + tol=1.e-10): scaling = np.sqrt(np.diag(con_linear.dot(precision).dot(con_linear.T))) if feasible_point is None: feasible_point = 1. / scaling - objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u) / 2. \ - + np.log(1. + 1. / ((con_offset - con_linear.dot(u)) / scaling)).sum() - grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot( - 1. / (scaling + con_offset - con_linear.dot(u)) - - 1. / (con_offset - con_linear.dot(u))) - barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1. / ((scaling + con_offset - con_linear.dot(u)) ** 2.) - + 1. / ((con_offset - con_linear.dot(u)) ** 2.))).dot( - con_linear) + objective = lambda u: -u.T.dot(conjugate_arg) + u.T.dot(precision).dot(u)/2. \ + + np.log(1.+ 1./((con_offset - con_linear.dot(u))/ scaling)).sum() + grad = lambda u: -conjugate_arg + precision.dot(u) - con_linear.T.dot(1./(scaling + con_offset - con_linear.dot(u)) - + 1./(con_offset - con_linear.dot(u))) + barrier_hessian = lambda u: con_linear.T.dot(np.diag(-1./((scaling + con_offset-con_linear.dot(u))**2.) + + 1./((con_offset-con_linear.dot(u))**2.))).dot(con_linear) current = feasible_point current_value = np.inf @@ -665,7 +666,7 @@ def solve_barrier_affine_py(conjugate_arg, while True: count += 1 proposal = current - step * cur_grad - if np.all(con_offset - con_linear.dot(proposal) > 0): + if np.all(con_offset-con_linear.dot(proposal) > 0): break step *= 0.5 if count >= 40: @@ -681,7 +682,7 @@ def solve_barrier_affine_py(conjugate_arg, if proposed_value <= current_value: break step *= 0.5 - if count >= 20: + if count >= 1000: if not (np.isnan(proposed_value) or np.isnan(current_value)): break else: @@ -708,8 +709,3 @@ def solve_barrier_affine_py(conjugate_arg, - - - - - diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 3e486e238..00ac95c23 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -18,30 +18,35 @@ def test_multitask_lasso_global(ntask=2, signal = np.sqrt(signal_fac * 2 * np.log(p)) - response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, - nsamples, - p, - global_sparsity, - task_sparsity, - sigma, - signal, - rhos)[:3] + response_vars, predictor_vars, beta, _gaussian_noise = gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos, + random_signs=False, + equicorrelated=False)[:4] + + feature_weight = weight * np.ones(p) + + #sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) + sigmas_ = sigma - feature_weight = weight * np.sqrt(2 * np.log(p)) * np.ones(p) - - sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - - randomizer_scales = 1. * sigmas_ + randomizer_scales = 0.71 * sigmas_ #ridge_terms = np.array([np.std(response_vars[i]) * np.sqrt(np.mean((predictor_vars[i] ** 2).sum(0)))/ np.sqrt(nsamples[i] - 1) # for i in range(ntask)]) + + _initial_omega = np.array([randomizer_scales[i] * _gaussian_noise[(i * p):((i + 1) * p)] for i in range(ntask)]).T multi_lasso = multi_task_lasso.gaussian(predictor_vars, response_vars, feature_weight, ridge_term=None, randomizer_scales=randomizer_scales) - multi_lasso.fit() + multi_lasso.fit(perturbations=_initial_omega) # dispersions = np.array([np.linalg.norm(response_vars[i] - # predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) @@ -62,8 +67,87 @@ def test_multitask_lasso_global(ntask=2, coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) - return coverage, intervals[:, 1]- intervals[:, 0] + pivot_ = ndist.cdf((estimate - beta_target) / np.sqrt(np.diag(observed_info_mean))) + pivot = 2 * np.minimum(pivot_, 1. - pivot_) + + return coverage, intervals[:, 1] - intervals[:, 0], pivot + +def test_multitask_lasso_naive_global(ntask=2, + nsamples=500 * np.ones(2), + p=100, + global_sparsity=.8, + task_sparsity=.3, + sigma=1. * np.ones(2), + signal_fac=0.5, + rhos=0. * np.ones(2), + weight=2.): + + nsamples = nsamples.astype(int) + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos, + random_signs=True, + equicorrelated=False)[:3] + + feature_weight = weight * np.ones(p) + + sigmas_ = sigma + + perturbations = np.zeros((p, ntask)) + + multi_lasso = multi_task_lasso.gaussian(predictor_vars, + response_vars, + feature_weight, + ridge_term=None, + randomizer_scales=1. * sigmas_, + perturbations=perturbations) + active_signs = multi_lasso.fit() + + dispersions = sigma ** 2 + + estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_global(dispersions=dispersions) + + coverage = [] + pivot = [] + + if (active_signs != 0).sum() > 0: + + beta_target_ = [] + observed_target_ = [] + tot_par = multi_lasso.active_global.shape[0] + + prec_target = np.zeros((tot_par, tot_par)) + for j in range(ntask): + beta_target_.extend(np.linalg.pinv((predictor_vars[j])[:, multi_lasso.active_global]).dot( + predictor_vars[j].dot(beta[:, j]))) + Qfeat = np.linalg.inv((predictor_vars[j])[:, multi_lasso.active_global].T.dot((predictor_vars[j])[:, multi_lasso.active_global])) + observed_target_.extend(np.linalg.pinv((predictor_vars[j])[:, multi_lasso.active_global]).dot(response_vars[j])) + prec_target += np.linalg.inv(Qfeat * dispersions[j]) + + beta_target_ = np.asarray(beta_target_) + beta_target = multi_lasso.W_coef.dot(beta_target_) + observed_target_ = np.asarray(observed_target_) + observed_target = multi_lasso.W_coef.dot(observed_target_) + cov_target = np.linalg.inv(prec_target) + + alpha = 1. - 0.90 + quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([observed_target - quantile * np.sqrt(np.diag(cov_target)), + observed_target + quantile * np.sqrt(np.diag(cov_target))]).T + coverage.extend((beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])) + pivot_ = ndist.cdf((observed_target - beta_target) / np.sqrt(np.diag(cov_target))) + pivot.extend(2 * np.minimum(pivot_, 1. - pivot_)) + + return np.asarray(coverage), intervals[:, 1] - intervals[:, 0], np.asarray(pivot) def test_multitask_lasso_hetero(ntask=2, nsamples=500 * np.ones(2), @@ -79,84 +163,190 @@ def test_multitask_lasso_hetero(ntask=2, signal = np.sqrt(signal_fac * 2 * np.log(p)) - response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, - nsamples, - p, - global_sparsity, - task_sparsity, - sigma, - signal, - rhos)[:3] + while True: - feature_weight = weight * np.sqrt(2 * np.log(p)) * np.ones(p) + response_vars, predictor_vars, beta, _gaussian_noise = gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos, + random_signs=True, + equicorrelated=False)[:4] - sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - #sigmas_ = sigma - randomizer_scales = 0.71 * sigmas_ - multi_lasso = multi_task_lasso.gaussian(predictor_vars, - response_vars, - feature_weight, - ridge_term=None, - randomizer_scales=randomizer_scales) - active_signs = multi_lasso.fit() + feature_weight = weight * np.ones(p) - dispersions = sigma ** 2 + sigmas_ = sigma + randomizer_scales = 0.71 * np.array([sigmas_[i] for i in range(ntask)]) - estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_hetero(dispersions=dispersions) + _initial_omega = np.array([randomizer_scales[i] * _gaussian_noise[(i * p):((i + 1) * p)] for i in range(ntask)]).T - beta_target = [] + multi_lasso = multi_task_lasso.gaussian(predictor_vars, + response_vars, + feature_weight, + ridge_term=None, + randomizer_scales=randomizer_scales, + perturbations=None) - for j in range(ntask): - beta_target.extend(np.linalg.pinv((predictor_vars[j])[:, (active_signs[:, j] != 0)]).dot(predictor_vars[j].dot(beta[:, j]))) + active_signs = multi_lasso.fit(perturbations=_initial_omega) - beta_target = np.asarray(beta_target) + if (active_signs != 0).sum()>0: - coverage = (beta_target > intervals[:, 0]) * (beta_target < - intervals[:, 1]) + dispersions = sigma ** 2 + + estimate, observed_info_mean, Z_scores, pvalues, intervals = multi_lasso.multitask_inference_hetero(dispersions=dispersions) + + beta_target = [] - return coverage, intervals[:, 1] - intervals[:, 0] + for i in range(ntask): + X, y = multi_lasso.loglikes[i].data + beta_target.extend(np.linalg.pinv(X[:, (active_signs[:, i] != 0)]).dot(X.dot(beta[:, i]))) + beta_target = np.asarray(beta_target) + pivot_ = ndist.cdf((estimate - beta_target)/np.sqrt(np.diag(observed_info_mean))) + pivot = 2 * np.minimum(pivot_, 1. - pivot_) + + coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) + + return coverage, intervals[:, 1] - intervals[:, 0], pivot + + +def test_multitask_lasso_naive_hetero(ntask=2, + nsamples=500 * np.ones(2), + p=100, + global_sparsity=.8, + task_sparsity=.3, + sigma=1. * np.ones(2), + signal_fac=0.5, + rhos=0. * np.ones(2), + weight=2.): + + nsamples = nsamples.astype(int) + signal = np.sqrt(signal_fac * 2 * np.log(p)) + + while True: + response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, + nsamples, + p, + global_sparsity, + task_sparsity, + sigma, + signal, + rhos, + random_signs=True, + equicorrelated=False)[:3] + + feature_weight = weight * np.ones(p) + + sigmas_ = sigma + + perturbations = np.zeros((p, ntask)) + + multi_lasso = multi_task_lasso.gaussian(predictor_vars, + response_vars, + feature_weight, + ridge_term=None, + randomizer_scales=1. * sigmas_, + perturbations=perturbations) + active_signs = multi_lasso.fit() + + dispersions = sigma ** 2 + + coverage = [] + pivot = [] + + if (active_signs != 0).sum() > 0: + + for i in range(ntask): + X, y = multi_lasso.loglikes[i].data + beta_target = np.linalg.pinv(X[:, (active_signs[:, i] != 0)]).dot(X.dot(beta[:, i])) + Qfeat = np.linalg.inv(X[:, (active_signs[:, i] != 0)].T.dot(X[:, (active_signs[:, i] != 0)])) + observed_target = np.linalg.pinv(X[:, (active_signs[:, i] != 0)]).dot(y) + cov_target = Qfeat * dispersions[i] + alpha = 1. - 0.90 + quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([observed_target - quantile * np.sqrt(np.diag(cov_target)), + observed_target + quantile * np.sqrt(np.diag(cov_target))]).T + coverage.extend((beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])) + pivot_ = ndist.cdf((observed_target - beta_target) / np.sqrt(np.diag(cov_target))) + pivot.extend(2 * np.minimum(pivot_, 1. - pivot_)) + + return np.asarray(coverage), intervals[:, 1] - intervals[:, 0], np.asarray(pivot) + + + +import matplotlib.pyplot as plt +from statsmodels.distributions.empirical_distribution import ECDF def test_coverage(nsim=100): cov = [] len = [] + pivots = [] for n in range(nsim): ntask = 5 - # coverage, length = test_multitask_lasso_global(ntask=ntask, - # nsamples=500 * np.ones(ntask), - # p=50, - # global_sparsity=.95, - # task_sparsity=0.50, - # sigma=1 * np.ones(ntask), - # signal_fac=0.2, - # rhos=0.30 * np.ones(ntask), - # weight=1.) - - coverage, length = test_multitask_lasso_hetero(ntask=ntask, - nsamples=3000 * np.ones(ntask), - p=200, - global_sparsity=.90, - task_sparsity=0.50, - sigma=1.*np.ones(ntask), - signal_fac=np.array([0.2, 0.5]), - rhos=0.30*np.ones(ntask), - weight=1.) - - if coverage.sum() <= 0.10: - break + coverage, length, pivot = test_multitask_lasso_global(ntask=ntask, + nsamples=1000 * np.ones(ntask), + p=50, + global_sparsity=0.95, + task_sparsity=0.20, + sigma=1. * np.ones(ntask), + signal_fac=0.5, + rhos=0.50 * np.ones(ntask), + weight=1.) + + # coverage, length, pivot = test_multitask_lasso_naive_global(ntask=ntask, + # nsamples=1000 * np.ones(ntask), + # p=50, + # global_sparsity=0.95, + # task_sparsity=0.20, + # sigma=1. * np.ones(ntask), + # signal_fac=1., + # rhos=0.50 * np.ones(ntask), + # weight=1.) + + # coverage, length, pivot = test_multitask_lasso_hetero(ntask=ntask, + # nsamples=1000 * np.ones(ntask), + # p=50, + # global_sparsity=0.95, + # task_sparsity=0.20, + # sigma=1. * np.ones(ntask), + # signal_fac=np.array([1., 5.]), + # rhos=0.50 * np.ones(ntask), + # weight=1.) + + # coverage, length, pivot = test_multitask_lasso_naive_hetero(ntask=ntask, + # nsamples=1000 * np.ones(ntask), + # p=50, + # global_sparsity=0.95, + # task_sparsity=0.20, + # sigma=1. * np.ones(ntask), + # signal_fac=np.array([1., 5.]), + # rhos=0.50 * np.ones(ntask), + # weight=1.) + cov.extend(coverage) len.extend(length) + pivots.extend(pivot) print("iteration completed ", n) print("coverage so far ", np.mean(np.asarray(cov))) print("length so far ", np.mean(np.asarray(length))) + plt.clf() + ecdf_MLE = ECDF(np.asarray(pivots)) + grid = np.linspace(0, 1, 101) + plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^') + plt.plot(grid, grid, 'k--') + plt.show() + if __name__ == "__main__": - test_coverage(nsim=100) \ No newline at end of file + test_coverage(nsim=20) \ No newline at end of file diff --git a/selectinf/randomized/tests/test_multitask_lasso_pivot.py b/selectinf/randomized/tests/test_multitask_lasso_pivot.py new file mode 100644 index 000000000..e69de29bb diff --git a/selectinf/tests/instance.py b/selectinf/tests/instance.py index 85ab5d43a..aca0de109 100644 --- a/selectinf/tests/instance.py +++ b/selectinf/tests/instance.py @@ -387,13 +387,11 @@ def gaussian_multitask_instance(ntask, df=np.inf, scale=True, center=True, - equicorrelated=True): + equicorrelated=False): predictor_vars= {i: _design(nsamples[i], p, rhos[i], equicorrelated)[0] for i in range(ntask)} - cov_matrices = [_design(nsamples[i], p, rhos[i], equicorrelated)[1] for i in range(ntask)] - if center: predictor_vars = {i: predictor_vars[i]-predictor_vars[i].mean(0)[None, :] for i in range(ntask)} @@ -411,23 +409,22 @@ def gaussian_multitask_instance(ntask, nsignal = int(round(global_sparsity * p)) global_nulls = np.random.choice(p, nsignal, replace=False) beta[global_nulls, :] = np.zeros((ntask,)) - non_nulls = np.setdiff1d(np.arange(p), global_nulls) - for j in range(ntask): - beta[non_nulls, j] = np.linspace(float(signal[0]), float(signal[1]), num=p-nsignal) for i in np.delete(range(p), global_nulls): - beta[i, np.random.choice(ntask, int(round(task_sparsity * ntask)))] = 0. + null_positions = np.random.choice(ntask, int(round(task_sparsity * ntask)), replace=False) + beta[i, null_positions] = 0. + non_null_positions = np.setdiff1d(np.arange(ntask), null_positions) + beta[i, non_null_positions] = np.linspace(float(signal[0]), float(signal[1]), num=ntask-null_positions.shape[0]) if random_signs: beta *= (2 * np.random.binomial(1, 0.5, size=(p,ntask)) - 1.) - beta /= [np.sqrt(nsamples[i]) for i in range(len(nsamples))] + beta /= np.sqrt(nsamples) if scale: scalings = {i: predictor_vars[i].std(0) * np.sqrt(nsamples[i]) for i in range(ntask)} - predictor_vars = {i: predictor_vars[i]/scalings[i][None, :] for i in range(ntask)} - beta *= [np.sqrt(nsamples[i]) for i in range(len(nsamples))] - cov_matrices = {i: cov_matrices[i] / np.multiply.outer(scalings[i], scalings[i]) for i in range(ntask)} + predictor_vars = {i: predictor_vars[i]/(scalings[i][None, :]) for i in range(ntask)} + beta *= np.sqrt(nsamples) active = np.zeros((p, ntask), np.bool) active[beta != 0] = True @@ -440,6 +437,13 @@ def _noise(n, df=np.inf): sd_t = np.std(tdist.rvs(df, size=50000)) return tdist.rvs(df, size=n) / sd_t - response_vars = {i: (predictor_vars[i].dot(beta[:, i]) + _noise(nsamples[i], df)) * sigma[i] for i in range(ntask)} + gaussian_noise = _noise(nsamples.sum() + p*ntask, df) + response_vars = {} + nsamples_cumsum = np.cumsum(nsamples) + for i in range(ntask): + if i == 0: + response_vars[i] = (predictor_vars[i].dot(beta[:, i]) + gaussian_noise[:nsamples_cumsum[i]]) * sigma[i] + else: + response_vars[i] = (predictor_vars[i].dot(beta[:, i]) + gaussian_noise[nsamples_cumsum[i-1]:nsamples_cumsum[i]]) * sigma[i] - return response_vars, predictor_vars, beta * sigma, np.nonzero(active), sigma, cov_matrices \ No newline at end of file + return response_vars, predictor_vars, beta * sigma, gaussian_noise[nsamples_cumsum[-1]:], np.nonzero(active), sigma \ No newline at end of file From 25a22fc4ee6fa38ece56cc0e287298a52b51d960 Mon Sep 17 00:00:00 2001 From: Snigdha Panigrahi Date: Wed, 3 Feb 2021 08:38:54 -0500 Subject: [PATCH 25/26] commit before switch --- selectinf/randomized/query.py | 5 +- .../tests/test_multitask_lasso_pivot.py | 0 .../tests/test_selective_MLE_high.py | 54 ++++++++++++------- 3 files changed, 38 insertions(+), 21 deletions(-) delete mode 100644 selectinf/randomized/tests/test_multitask_lasso_pivot.py diff --git a/selectinf/randomized/query.py b/selectinf/randomized/query.py index 7284b55f6..ce31b65d1 100644 --- a/selectinf/randomized/query.py +++ b/selectinf/randomized/query.py @@ -1533,6 +1533,7 @@ def _solve_barrier_affine_py(conjugate_arg, if itercount % 4 == 0: step *= 2 + print("NSTEP + STEP", itercount, step, tol) hess = np.linalg.inv(precision + barrier_hessian(current)) return current_value, current, hess @@ -1680,12 +1681,12 @@ def selective_MLE(observed_target, prec_opt = np.linalg.inv(cond_cov) conjugate_arg = prec_opt.dot(cond_mean) - + useC = False if useC: solver = solve_barrier_affine_C else: solver = _solve_barrier_affine_py - + print("check ", useC) val, soln, hess = solver(conjugate_arg, prec_opt, init_soln, diff --git a/selectinf/randomized/tests/test_multitask_lasso_pivot.py b/selectinf/randomized/tests/test_multitask_lasso_pivot.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/selectinf/randomized/tests/test_selective_MLE_high.py b/selectinf/randomized/tests/test_selective_MLE_high.py index 4770dc450..8e2ef9cc6 100644 --- a/selectinf/randomized/tests/test_selective_MLE_high.py +++ b/selectinf/randomized/tests/test_selective_MLE_high.py @@ -83,10 +83,10 @@ def test_full_targets(n=200, def test_selected_targets(n=2000, p=200, - signal_fac=10., + signal_fac=0.5, s=5, sigma=3, - rho=0.4, + rho=0.2, randomizer_scale=1, full_dispersion=True): """ @@ -114,6 +114,7 @@ def test_selected_targets(n=2000, sigma_ = np.std(Y) W = np.ones(X.shape[1]) * np.sqrt(2 * np.log(p)) * sigma_ + #W = np.append(np.ones(10) * 0.8* np.sqrt(2 * np.log(p)) * sigma_, np.ones(90) * np.sqrt(2 * np.log(p)) * sigma_ * (10 ** 10)) conv = const(X, Y, @@ -142,19 +143,23 @@ def test_selected_targets(n=2000, cov_target_score)[0] estimate = result['MLE'] pval = result['pvalue'] + se = result['SE'] intervals = np.asarray(result[['lower_confidence', 'upper_confidence']]) beta_target = np.linalg.pinv(X[:, nonzero]).dot(X.dot(beta)) + pivot = (estimate - beta_target) / se coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) - return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals + return pval[beta[nonzero] == 0], pval[beta[nonzero] != 0], coverage, intervals, pivot +import matplotlib.pyplot as plt +import seaborn as sns def main(nsim=500, full=False): - P0, PA, cover, length_int = [], [], [], [] + P0, PA, cover, length_int, pivot = [], [], [], [], [] from statsmodels.distributions import ECDF - n, p, s = 500, 100, 5 + n, p, s = 500, 100, 10 for i in range(nsim): if full: @@ -166,16 +171,29 @@ def main(nsim=500, full=False): avg_length = intervals[:, 1] - intervals[:, 0] else: full_dispersion = True - p0, pA, cover_, intervals = test_selected_targets(n=n, p=p, s=s, + p0, pA, cover_, intervals, pivot_ = test_selected_targets(n=n, p=p, s=s, full_dispersion=full_dispersion) avg_length = intervals[:, 1] - intervals[:, 0] cover.extend(cover_) + pivot.extend(pivot_) P0.extend(p0) PA.extend(pA) - print( - np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover), - np.mean(avg_length), 'null pvalue + power + length') + print(np.mean(cover), np.mean(avg_length), 'coverage + length') + # print( + # np.array(PA) < 0.1, np.mean(P0), np.std(P0), np.mean(np.array(P0) < 0.1), np.mean(np.array(PA) < 0.1), np.mean(cover), + # np.mean(avg_length), 'null pvalue + power + length') + + sns.distplot(np.asarray(pivot)) + plt.show() + +if __name__ == "__main__": + main(nsim=500) + + + + + def test_instance(): @@ -218,14 +236,12 @@ def test_instance(): return coverage -def main(nsim=500): - - cover = [] - for i in range(nsim): +# def main(nsim=500): +# +# cover = [] +# for i in range(nsim): +# +# cover_ = test_instance() +# cover.extend(cover_) +# print(np.mean(cover), 'coverage so far ') - cover_ = test_instance() - cover.extend(cover_) - print(np.mean(cover), 'coverage so far ') - -if __name__ == "__main__": - main(nsim=1) From dd059d4f59f09b9cf9933e1e47493ecfd837c898 Mon Sep 17 00:00:00 2001 From: Snigdha Panigrahi Date: Fri, 30 Apr 2021 11:26:38 -0400 Subject: [PATCH 26/26] clean up + bug fixes --- selectinf/randomized/multitask_lasso.py | 224 ++----------- .../randomized/tests/test_multitask_lasso.py | 317 ++++++------------ 2 files changed, 133 insertions(+), 408 deletions(-) diff --git a/selectinf/randomized/multitask_lasso.py b/selectinf/randomized/multitask_lasso.py index 67f8a28af..d0b3af683 100644 --- a/selectinf/randomized/multitask_lasso.py +++ b/selectinf/randomized/multitask_lasso.py @@ -254,6 +254,8 @@ def _setup_implied_gaussian(self): self.cond_cov = cond_cov self.cond_precision = cond_precision self.logdens_linear = logdens_linear + self.score_offset = self.observed_score_states + self.opt_offsets + self.precs = precs return cond_mean, cond_cov, cond_precision, logdens_linear @@ -272,9 +274,32 @@ def multitask_inference_hetero(self, cond_precision = self.cond_precision logdens_linear = self.logdens_linear - target_lin = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) - prec_opt = cond_precision + + ntarget = prec_target.shape[0] + nopt = prec_opt.shape[0] + + target_linear = cov_target_score.T.dot(prec_target) + target_offset = self.score_offset - cov_target_score.T.dot(prec_target).dot(observed_target) + target_lin = - logdens_linear.dot(target_linear) + + implied_precision = np.zeros((ntarget + nopt, ntarget + nopt)) + + implied_precision[:ntarget][:, :ntarget] = prec_target + (target_linear.T.dot(self.precs).dot(target_linear)) + + implied_precision[ntarget:][:, :ntarget] = -prec_opt.dot(target_lin) + + implied_precision[:ntarget][:, ntarget:] = implied_precision[ntarget:][:, :ntarget].T + + implied_precision[ntarget:][:, ntarget:] = prec_opt + + implied_cov = np.linalg.inv(implied_precision) + + _Q = prec_opt.dot(logdens_linear).dot(target_offset) + _P = (target_linear.T.dot(self.precs).dot(target_offset)) + _prec = np.linalg.inv(implied_cov[:ntarget][:, :ntarget]) + C = cov_target.dot(_P + _prec.dot(implied_cov[:ntarget][:, ntarget:]).dot(_Q)) + conjugate_arg = prec_opt.dot(cond_mean) val, soln, hess = solve_barrier_affine_py(conjugate_arg, @@ -287,10 +312,11 @@ def multitask_inference_hetero(self, min_its=5000, tol=1.e-12) - final_estimator = observed_target + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + final_estimator = cov_target.dot(_prec).dot(observed_target) \ + + cov_target.dot(target_lin.T.dot(prec_opt.dot(cond_mean - soln))) + C L = target_lin.T.dot(prec_opt) - observed_info_natural = prec_target + L.dot(target_lin) - L.dot(hess.dot(L.T)) + observed_info_natural = _prec + L.dot(target_lin) - L.dot(hess.dot(L.T)) observed_info_mean = cov_target.dot(observed_info_natural.dot(cov_target)) Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) @@ -308,7 +334,6 @@ def multitask_target_hetero(self, dispersions=None): observed_targets = [] - #ancillary_stats = [] cov_targets = np.array([]) crosscov_target_scores = np.array([]) @@ -361,194 +386,6 @@ def _solve_randomized_problem(self, return initial_solns, initial_subgrads - def multitask_inference_global(self, - dispersions=None, - step=1, - nstep=1000, - min_its=200, - tol=1.e-8, - level=0.9): - self._set_marginal_parameters(dispersions=dispersions) - - current = self.initial_estimate_mle - current_value = np.inf - - log_lik_current, grad_lik_current, hess_lik_current = self.multitask_likelihood_global(current) - - for itercount in range(nstep): - newton_step = -grad_lik_current - - count = 0 - - while True: - count += 1 - proposal = current - step * newton_step - - log_lik_proposal, grad_lik_proposal, hess_lik_proposal = self.multitask_likelihood_global(proposal) - proposed_value = -log_lik_proposal - - if proposed_value <= current_value: - break - step *= 0.5 - - if count >= 20: - if not (np.isnan(proposed_value) or np.isnan(current_value)): - break - else: - raise ValueError('value is NaN: %f, %f' % (proposed_value, current_value)) - - if np.fabs(current_value - proposed_value) < tol * np.fabs(current_value) and itercount >= min_its: - current = proposal - current_value = proposed_value - - grad_lik_current = grad_lik_proposal - hess_lik_current = hess_lik_proposal - break - - current = proposal - current_value = proposed_value - grad_lik_current = grad_lik_proposal - hess_lik_current = hess_lik_proposal - - if itercount % 4 == 0: - step *= 2 - - final_estimator = current - observed_info_mean = np.linalg.inv(-hess_lik_current) - - Z_scores = final_estimator / np.sqrt(np.diag(observed_info_mean)) - pvalues = ndist.cdf(Z_scores) - pvalues = 2 * np.minimum(pvalues, 1 - pvalues) - - alpha = 1. - level - quantile = ndist.ppf(1 - alpha / 2.) - intervals = np.vstack([final_estimator - quantile * np.sqrt(np.diag(observed_info_mean)), - final_estimator + quantile * np.sqrt(np.diag(observed_info_mean))]).T - - return final_estimator, observed_info_mean, Z_scores, pvalues, intervals - - - def multitask_target_global(self, - dispersions=None, - solve_args={'tol': 1.e-12, 'min_its': 50}): - observed_targets = [] - cov_targets = np.array([]) - crosscov_target_scores = np.array([]) - - features = self.active_global - - for i in range(self.ntask): - - X, y = self.loglikes[i].data - n, p = X.shape - W = self.loglikes[i].saturated_loss.hessian(X.dot(self.beta_bar[:, i])) - - Xfeat = X[:, features] - Qfeat = Xfeat.T.dot(W[:, None] * Xfeat) - - observed_target = restricted_estimator(self.loglikes[i], features, solve_args=solve_args) - observed_targets.extend(observed_target) - - cov_target = np.linalg.inv(Qfeat) - _score_linear = -Xfeat.T.dot(W[:, None] * X).T - - crosscov_target_score = _score_linear.dot(cov_target) - - if dispersions is None: - dispersion = ((y - self.loglikes[i].saturated_loss.mean_function( - Xfeat.dot(observed_target))) ** 2 / W).sum() / (n - Xfeat.shape[1]) - else: - dispersion = dispersions[i] - - crosscov_target_scores = block_diag(crosscov_target_scores, crosscov_target_score.T * dispersion) - cov_targets = block_diag(cov_targets, cov_target * dispersion) - - return (np.asarray(observed_targets), - cov_targets[1:, :], - crosscov_target_scores[1:, :]) - - def _set_marginal_parameters(self, - dispersions=None): - observed_target, cov_target, cov_target_score = self.multitask_target_global(dispersions=dispersions) - ntarget = observed_target.shape[0] - - prec_target = np.linalg.inv(cov_target) - - cond_mean, cond_cov, cond_precision, logdens_linear = self._setup_implied_gaussian() - nopt = cond_cov.shape[0] - - target_linear = -logdens_linear.dot(cov_target_score.T.dot(prec_target)) - - implied_precision = np.zeros((ntarget + nopt, ntarget + nopt)) - implied_precision[:ntarget][:, :ntarget] = (prec_target + target_linear.T.dot(cond_precision.dot(target_linear))) - implied_precision[:ntarget][:, ntarget:] = -target_linear.T.dot(cond_precision) - implied_precision[ntarget:][:, :ntarget] = (-target_linear.T.dot(cond_precision)).T - - implied_precision[ntarget:][:, ntarget:] = cond_precision - - implied_cov = np.linalg.inv(implied_precision) - - self.linear_coef = implied_cov[ntarget:][:, :ntarget].dot(prec_target) - - target_offset = cond_mean - target_linear.dot(observed_target) - M = implied_cov[ntarget:][:, ntarget:].dot(cond_precision.dot(target_offset)) - N = -target_linear.T.dot(cond_precision).dot(target_offset) - - self.offset_coef = implied_cov[ntarget:][:, :ntarget].dot(N) + M - - self.cov_marginal = implied_cov[ntarget:][:, ntarget:] - self.prec_marginal = np.linalg.inv(self.cov_marginal) - - self.observed_target = observed_target - self.prec_target = prec_target - - D = np.identity(self.active_global.shape[0]) - for i in range(self.ntask - 1): - D = np.block([[D], [np.identity(self.active_global.shape[0])]]) - - self.W_coef = np.linalg.inv(D.T.dot(self.prec_target).dot(D)).dot(D.T.dot(self.prec_target)) - - self.initial_estimate_mle = self.W_coef.dot(self.observed_target) - - self.D = D - - - def multitask_likelihood_global(self, - target_parameter): - ##this is the O problem - - par = self.D.dot(target_parameter) - mean_marginal = self.linear_coef.dot(par) + self.offset_coef - prec_marginal = self.prec_marginal - conjugate_marginal = prec_marginal.dot(mean_marginal) - - val, soln, hess = solve_barrier_affine_py(conjugate_marginal, - prec_marginal, - self.observed_opt_states, - self.linear_con, - self.offset_con, - step=1, - nstep=5000, - min_its=1000, - tol=1.e-12) - - log_normalizer = -val - mean_marginal.T.dot(prec_marginal).dot(mean_marginal) / 2. - - ## based on the O problem, we now write the log-likelihood, its gradient and hessian - - log_lik = -( - (self.observed_target - par).T.dot(self.prec_target).dot(self.observed_target - par)) / 2. - log_normalizer - - grad_lik = self.D.T.dot(self.prec_target.dot(self.observed_target) - - self.prec_target.dot(par) - self.linear_coef.T.dot( - prec_marginal.dot(soln) - conjugate_marginal)) - - hess_lik = self.D.T.dot(-self.prec_target + self.linear_coef.T.dot(prec_marginal).dot(self.linear_coef) - - self.linear_coef.T.dot(prec_marginal).dot(hess).dot( - prec_marginal.dot(self.linear_coef))).dot(self.D) - - return log_lik, grad_lik, hess_lik - def _solve_multitask_problem(self, perturbations=None, num_iter=1000, atol=1.e-5): if perturbations is not None: @@ -702,6 +539,7 @@ def solve_barrier_affine_py(conjugate_arg, step *= 2 hess = np.linalg.inv(precision + barrier_hessian(current)) + return current_value, current, hess diff --git a/selectinf/randomized/tests/test_multitask_lasso.py b/selectinf/randomized/tests/test_multitask_lasso.py index 00ac95c23..6910afb55 100644 --- a/selectinf/randomized/tests/test_multitask_lasso.py +++ b/selectinf/randomized/tests/test_multitask_lasso.py @@ -4,151 +4,6 @@ from ..multitask_lasso import multi_task_lasso from ...tests.instance import gaussian_multitask_instance -def test_multitask_lasso_global(ntask=2, - nsamples=500 * np.ones(2), - p=100, - global_sparsity=.8, - task_sparsity=.3, - sigma=1.*np.ones(2), - signal_fac= 0.5, - rhos=0.*np.ones(2), - weight=2.): - - nsamples = nsamples.astype(int) - - signal = np.sqrt(signal_fac * 2 * np.log(p)) - - response_vars, predictor_vars, beta, _gaussian_noise = gaussian_multitask_instance(ntask, - nsamples, - p, - global_sparsity, - task_sparsity, - sigma, - signal, - rhos, - random_signs=False, - equicorrelated=False)[:4] - - feature_weight = weight * np.ones(p) - - #sigmas_ = np.array([np.std(response_vars[i]) for i in range(ntask)]) - sigmas_ = sigma - - randomizer_scales = 0.71 * sigmas_ - - #ridge_terms = np.array([np.std(response_vars[i]) * np.sqrt(np.mean((predictor_vars[i] ** 2).sum(0)))/ np.sqrt(nsamples[i] - 1) - # for i in range(ntask)]) - - _initial_omega = np.array([randomizer_scales[i] * _gaussian_noise[(i * p):((i + 1) * p)] for i in range(ntask)]).T - - multi_lasso = multi_task_lasso.gaussian(predictor_vars, - response_vars, - feature_weight, - ridge_term=None, - randomizer_scales=randomizer_scales) - multi_lasso.fit(perturbations=_initial_omega) - - # dispersions = np.array([np.linalg.norm(response_vars[i] - - # predictor_vars[i].dot(np.linalg.pinv(predictor_vars[i]).dot(response_vars[i]))) ** 2 / (nsamples[i] - p) - # for i in range(ntask)]) - - dispersions = sigma ** 2 - - estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_global(dispersions=dispersions) - - beta_target_ = [] - - for j in range(ntask): - beta_target_.extend(np.linalg.pinv((predictor_vars[j])[:, multi_lasso.active_global]).dot(predictor_vars[j].dot(beta[:,j]))) - - beta_target_ = np.asarray(beta_target_) - beta_target = multi_lasso.W_coef.dot(beta_target_) - - coverage = (beta_target > intervals[:, 0]) * (beta_target < - intervals[:, 1]) - - pivot_ = ndist.cdf((estimate - beta_target) / np.sqrt(np.diag(observed_info_mean))) - pivot = 2 * np.minimum(pivot_, 1. - pivot_) - - return coverage, intervals[:, 1] - intervals[:, 0], pivot - - -def test_multitask_lasso_naive_global(ntask=2, - nsamples=500 * np.ones(2), - p=100, - global_sparsity=.8, - task_sparsity=.3, - sigma=1. * np.ones(2), - signal_fac=0.5, - rhos=0. * np.ones(2), - weight=2.): - - nsamples = nsamples.astype(int) - signal = np.sqrt(signal_fac * 2 * np.log(p)) - - while True: - response_vars, predictor_vars, beta = gaussian_multitask_instance(ntask, - nsamples, - p, - global_sparsity, - task_sparsity, - sigma, - signal, - rhos, - random_signs=True, - equicorrelated=False)[:3] - - feature_weight = weight * np.ones(p) - - sigmas_ = sigma - - perturbations = np.zeros((p, ntask)) - - multi_lasso = multi_task_lasso.gaussian(predictor_vars, - response_vars, - feature_weight, - ridge_term=None, - randomizer_scales=1. * sigmas_, - perturbations=perturbations) - active_signs = multi_lasso.fit() - - dispersions = sigma ** 2 - - estimate, observed_info_mean, _, _, intervals = multi_lasso.multitask_inference_global(dispersions=dispersions) - - coverage = [] - pivot = [] - - if (active_signs != 0).sum() > 0: - - beta_target_ = [] - observed_target_ = [] - tot_par = multi_lasso.active_global.shape[0] - - prec_target = np.zeros((tot_par, tot_par)) - for j in range(ntask): - beta_target_.extend(np.linalg.pinv((predictor_vars[j])[:, multi_lasso.active_global]).dot( - predictor_vars[j].dot(beta[:, j]))) - Qfeat = np.linalg.inv((predictor_vars[j])[:, multi_lasso.active_global].T.dot((predictor_vars[j])[:, multi_lasso.active_global])) - observed_target_.extend(np.linalg.pinv((predictor_vars[j])[:, multi_lasso.active_global]).dot(response_vars[j])) - prec_target += np.linalg.inv(Qfeat * dispersions[j]) - - beta_target_ = np.asarray(beta_target_) - beta_target = multi_lasso.W_coef.dot(beta_target_) - observed_target_ = np.asarray(observed_target_) - observed_target = multi_lasso.W_coef.dot(observed_target_) - cov_target = np.linalg.inv(prec_target) - - alpha = 1. - 0.90 - quantile = ndist.ppf(1 - alpha / 2.) - intervals = np.vstack([observed_target - quantile * np.sqrt(np.diag(cov_target)), - observed_target + quantile * np.sqrt(np.diag(cov_target))]).T - coverage.extend((beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])) - pivot_ = ndist.cdf((observed_target - beta_target) / np.sqrt(np.diag(cov_target))) - pivot.extend(2 * np.minimum(pivot_, 1. - pivot_)) - - return np.asarray(coverage), intervals[:, 1] - intervals[:, 0], np.asarray(pivot) - def test_multitask_lasso_hetero(ntask=2, nsamples=500 * np.ones(2), p=100, @@ -157,7 +12,8 @@ def test_multitask_lasso_hetero(ntask=2, sigma=1. * np.ones(2), signal_fac=0.5, rhos=0. * np.ones(2), - weight=2.): + weight=2., + randomizer_variation = 1.): nsamples = nsamples.astype(int) @@ -176,7 +32,6 @@ def test_multitask_lasso_hetero(ntask=2, random_signs=True, equicorrelated=False)[:4] - feature_weight = weight * np.ones(p) sigmas_ = sigma @@ -193,7 +48,7 @@ def test_multitask_lasso_hetero(ntask=2, active_signs = multi_lasso.fit(perturbations=_initial_omega) - if (active_signs != 0).sum()>0: + if (active_signs != 0).sum() > 0: dispersions = sigma ** 2 @@ -206,7 +61,7 @@ def test_multitask_lasso_hetero(ntask=2, beta_target.extend(np.linalg.pinv(X[:, (active_signs[:, i] != 0)]).dot(X.dot(beta[:, i]))) beta_target = np.asarray(beta_target) - pivot_ = ndist.cdf((estimate - beta_target)/np.sqrt(np.diag(observed_info_mean))) + pivot_ = ndist.cdf((estimate - beta_target) / np.sqrt(np.diag(observed_info_mean))) pivot = 2 * np.minimum(pivot_, 1. - pivot_) coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1]) @@ -261,91 +116,123 @@ def test_multitask_lasso_naive_hetero(ntask=2, if (active_signs != 0).sum() > 0: for i in range(ntask): + X, y = multi_lasso.loglikes[i].data beta_target = np.linalg.pinv(X[:, (active_signs[:, i] != 0)]).dot(X.dot(beta[:, i])) Qfeat = np.linalg.inv(X[:, (active_signs[:, i] != 0)].T.dot(X[:, (active_signs[:, i] != 0)])) observed_target = np.linalg.pinv(X[:, (active_signs[:, i] != 0)]).dot(y) cov_target = Qfeat * dispersions[i] + alpha = 1. - 0.90 quantile = ndist.ppf(1 - alpha / 2.) + intervals = np.vstack([observed_target - quantile * np.sqrt(np.diag(cov_target)), observed_target + quantile * np.sqrt(np.diag(cov_target))]).T + coverage.extend((beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])) + pivot_ = ndist.cdf((observed_target - beta_target) / np.sqrt(np.diag(cov_target))) pivot.extend(2 * np.minimum(pivot_, 1. - pivot_)) return np.asarray(coverage), intervals[:, 1] - intervals[:, 0], np.asarray(pivot) - +import matplotlib as mpl +mpl.use('tkagg') import matplotlib.pyplot as plt from statsmodels.distributions.empirical_distribution import ECDF -def test_coverage(nsim=100): - - cov = [] - len = [] - pivots = [] - - for n in range(nsim): - - ntask = 5 - - coverage, length, pivot = test_multitask_lasso_global(ntask=ntask, - nsamples=1000 * np.ones(ntask), - p=50, - global_sparsity=0.95, - task_sparsity=0.20, - sigma=1. * np.ones(ntask), - signal_fac=0.5, - rhos=0.50 * np.ones(ntask), - weight=1.) - - # coverage, length, pivot = test_multitask_lasso_naive_global(ntask=ntask, - # nsamples=1000 * np.ones(ntask), - # p=50, - # global_sparsity=0.95, - # task_sparsity=0.20, - # sigma=1. * np.ones(ntask), - # signal_fac=1., - # rhos=0.50 * np.ones(ntask), - # weight=1.) - - # coverage, length, pivot = test_multitask_lasso_hetero(ntask=ntask, - # nsamples=1000 * np.ones(ntask), - # p=50, - # global_sparsity=0.95, - # task_sparsity=0.20, - # sigma=1. * np.ones(ntask), - # signal_fac=np.array([1., 5.]), - # rhos=0.50 * np.ones(ntask), - # weight=1.) - - # coverage, length, pivot = test_multitask_lasso_naive_hetero(ntask=ntask, - # nsamples=1000 * np.ones(ntask), - # p=50, - # global_sparsity=0.95, - # task_sparsity=0.20, - # sigma=1. * np.ones(ntask), - # signal_fac=np.array([1., 5.]), - # rhos=0.50 * np.ones(ntask), - # weight=1.) - - - cov.extend(coverage) - len.extend(length) - pivots.extend(pivot) - - print("iteration completed ", n) - print("coverage so far ", np.mean(np.asarray(cov))) - print("length so far ", np.mean(np.asarray(length))) - - plt.clf() - ecdf_MLE = ECDF(np.asarray(pivots)) - grid = np.linspace(0, 1, 101) - plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^') - plt.plot(grid, grid, 'k--') - plt.show() +def test_coverage(nsim=100, coverage=False): + + if coverage == False: + + pivots_sel = [] + pivots_naive = [] + + for n in range(nsim): + + ntask = 4 + + pivot_sel = test_multitask_lasso_hetero(ntask=ntask, + nsamples=1000 * np.ones(ntask), + p=100, + global_sparsity=0.95, + task_sparsity=0.50, + sigma=1. * np.ones(ntask), + signal_fac=np.array([0.5, 1.]), + rhos=0.70 * np.ones(ntask), + weight=2.)[2] + + pivot_naive = test_multitask_lasso_naive_hetero(ntask=ntask, + nsamples=1000 * np.ones(ntask), + p=100, + global_sparsity=0.95, + task_sparsity=0.50, + sigma=1. * np.ones(ntask), + signal_fac=np.array([0.5, 1.]), + rhos=0.70 * np.ones(ntask), + weight=2.)[2] + + pivots_sel.extend(pivot_sel) + pivots_naive.extend(pivot_naive) + + print("iteration completed ", n) + + plt.clf() + + ecdf_MLE = ECDF(np.asarray(pivots_sel)) + ecdf_naive = ECDF(np.asarray(pivots_naive)) + + grid = np.linspace(0, 1, 101) + plt.plot(grid, ecdf_MLE(grid), c='blue', marker='^') + plt.plot(grid, ecdf_naive(grid), c='red', marker='^') + plt.plot(grid, grid, 'k--') + plt.show() + + if coverage == True: + + covs_sel = [] + lens_sel = [] + pivots_sel = [] + + covs_naive = [] + lens_naive = [] + pivots_naive = [] + + for n in range(nsim): + ntask = 4 + + cov_sel, len_sel, pivot_sel = test_multitask_lasso_hetero(ntask=ntask, + nsamples=1000 * np.ones(ntask), + p=100, + global_sparsity=0.95, + task_sparsity=0.50, + sigma=1. * np.ones(ntask), + signal_fac=np.array([0.5, 1.]), + rhos=0.70 * np.ones(ntask), + weight=2.) + + cov_naive, len_naive, pivot_naive = test_multitask_lasso_naive_hetero(ntask=ntask, + nsamples=1000 * np.ones(ntask), + p=100, + global_sparsity=0.95, + task_sparsity=0.50, + sigma=1. * np.ones(ntask), + signal_fac=np.array([0.5, 1.]), + rhos=0.70 * np.ones(ntask), + weight=2.)[2] + + covs_sel.extend(cov_sel) + lens_sel.extend(len_sel) + pivots_sel.extend(pivot_sel) + + covs_naive.extend(cov_naive) + lens_naive.extend(len_naive) + pivots_naive.extend(pivot_naive) + + print("iteration completed ", n) + print("coverage so far ", np.mean(np.asarray(covs_sel)), np.mean(np.asarray(covs_naive))) + print("length so far ", np.mean(np.asarray(lens_sel)), np.mean(np.asarray(lens_naive))) if __name__ == "__main__":