diff --git a/solvers/blitz.py b/solvers/blitz.py index 09dfdec..683bab5 100644 --- a/solvers/blitz.py +++ b/solvers/blitz.py @@ -4,6 +4,7 @@ with safe_import_context() as import_ctx: import blitzl1 + import numpy as np class Solver(BaseSolver): @@ -20,16 +21,11 @@ class Solver(BaseSolver): 'vol. 37, pp. 1171-1179 (2015)' ] - def skip(self, X, y, lmbd, fit_intercept): - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" - - return False, None - def set_objective(self, X, y, lmbd, fit_intercept): self.X, self.y, self.lmbd = X, y, lmbd + self.fit_intercept = fit_intercept - blitzl1.set_use_intercept(False) + blitzl1.set_use_intercept(self.fit_intercept) blitzl1.set_tolerance(0) self.problem = blitzl1.LassoProblem(self.X, self.y) @@ -39,7 +35,10 @@ def get_next(previous): return previous + 1 def run(self, n_iter): - self.coef_ = self.problem.solve(self.lmbd, max_iter=n_iter).x + self.sol_ = self.problem.solve(self.lmbd, max_iter=n_iter) def get_result(self): - return self.coef_.flatten() + if self.fit_intercept: + return np.r_[self.sol_.x, self.sol_.intercept] + else: + return self.sol_.x.flatten() diff --git a/solvers/cd.py b/solvers/cd.py index 289a217..6abc46a 100644 --- a/solvers/cd.py +++ b/solvers/cd.py @@ -37,14 +37,24 @@ class Solver(BaseSolver): ] def skip(self, X, y, lmbd, fit_intercept): - # XXX - not implemented but this should be quite easy - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + # XXX - intercept not implemented for sparse X but it shouldn't be hard + if fit_intercept and sparse.issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data", + ) return False, None def set_objective(self, X, y, lmbd, fit_intercept): - self.y, self.lmbd = y, lmbd + # Handling intercept: center y and X (dense data only) + if fit_intercept and not sparse.issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + + self.y, self.lmbd, self.fit_intercept = y, lmbd, fit_intercept if sparse.issparse(X): self.X = X @@ -66,6 +76,10 @@ def run(self, n_iter): L = (self.X ** 2).sum(axis=0) self.w = self.cd(self.X, self.y, self.lmbd, L, n_iter) + if self.fit_intercept and not sparse.issparse(self.X): + intercept = self.y_offset - self.X_offset @ self.w + self.w = np.r_[self.w, intercept] + @staticmethod @njit def cd(X, y, lmbd, L, n_iter): diff --git a/solvers/julia_pgd.py b/solvers/julia_pgd.py index 999a419..8d64de2 100644 --- a/solvers/julia_pgd.py +++ b/solvers/julia_pgd.py @@ -6,6 +6,8 @@ from benchopt.helpers.julia import assert_julia_installed with safe_import_context() as import_ctx: + import numpy as np + from scipy.sparse import issparse assert_julia_installed() @@ -27,15 +29,26 @@ class Solver(JuliaSolver): 'algorithm for linear inverse problems", SIAM J. Imaging Sci., ' 'vol. 2, no. 1, pp. 183-202 (2009)' ] + support_sparse = False def skip(self, X, y, lmbd, fit_intercept): - # XXX - fit intercept is not yet implemented in julia.jl - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + # XXX - fit intercept is not yet implemented in julia.jl for sparse X + if fit_intercept and issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data" + ) return False, None def set_objective(self, X, y, lmbd, fit_intercept): + # Handling intercept: center y and X (dense data only) + if fit_intercept and not issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept @@ -45,5 +58,9 @@ def set_objective(self, X, y, lmbd, fit_intercept): def run(self, n_iter): self.beta = self.solve_lasso(self.X, self.y, self.lmbd, n_iter) + if self.fit_intercept and not issparse(self.X): + intercept = self.y_offset - self.X_offset @ self.beta + self.beta = np.r_[self.beta.ravel(), intercept] + def get_result(self): return self.beta.ravel() diff --git a/solvers/l_bfgs_b.py b/solvers/l_bfgs_b.py index 2410c35..498bd5a 100644 --- a/solvers/l_bfgs_b.py +++ b/solvers/l_bfgs_b.py @@ -5,6 +5,7 @@ import numpy as np from numpy.linalg import norm from scipy.optimize import fmin_l_bfgs_b + from scipy.sparse import issparse class Solver(BaseSolver): @@ -28,13 +29,23 @@ class Solver(BaseSolver): ] def skip(self, X, y, lmbd, fit_intercept): - # XXX - not implemented but this should be quite easy - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + # XXX - intercept not implemented for sparse X for now + if fit_intercept and issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data" + ) return False, None def set_objective(self, X, y, lmbd, fit_intercept): + # Handling intercept: center y and X (dense data only) + if fit_intercept and not issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept @@ -65,5 +76,9 @@ def gradf(w): self.w = w_hat + if self.fit_intercept and not issparse(self.X): + intercept = self.y_offset - self.X_offset @ self.w + self.w = np.r_[self.w, intercept] + def get_result(self): return self.w.flatten() diff --git a/solvers/lightning.py b/solvers/lightning.py index bf7be8e..e8f1bf4 100644 --- a/solvers/lightning.py +++ b/solvers/lightning.py @@ -4,11 +4,10 @@ with safe_import_context() as import_ctx: import numpy as np + from scipy import sparse from lightning.regression import CDRegressor -# TODO: lightning always fit an intercept -# it is thus not optimizing the same cost function class Solver(BaseSolver): name = 'Lightning' @@ -25,12 +24,24 @@ class Solver(BaseSolver): ] def skip(self, X, y, lmbd, fit_intercept): - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + if fit_intercept and sparse.issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data", + ) return False, None def set_objective(self, X, y, lmbd, fit_intercept): + # lightning has an attribut intercept_ but it is not handled properly + # (as it is simply set to zero). For this reason, we handle intercept + # manually: center y and X beforehand (for dense data only) + if fit_intercept and not sparse.issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept @@ -45,6 +56,8 @@ def run(self, n_iter): def get_result(self): beta = self.clf.coef_.flatten() + if self.fit_intercept: - beta = np.r_[beta, self.clf.intercept_] + intercept = self.y_offset - self.X_offset @ beta + beta = np.r_[beta, intercept] return beta diff --git a/solvers/noncvx_pro.py b/solvers/noncvx_pro.py index 8317a48..54992d4 100644 --- a/solvers/noncvx_pro.py +++ b/solvers/noncvx_pro.py @@ -21,15 +21,33 @@ class Solver(BaseSolver): ] def set_objective(self, X, y, lmbd, fit_intercept): + # Handling intercept: center y and X (dense data only) + if fit_intercept and not issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + self.X, self.y, self.lmbd = X, y, lmbd + self.fit_intercept = fit_intercept + if X.shape[0] >= X.shape[1]: self.C = X.T @ X if issparse(self.C): self.C = self.C.toarray() def skip(self, X, y, lmbd, fit_intercept): - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + # XXX: make this solver work with sparse matrices. + if issparse(X): + return True, f"{self.name} does not support sparse design matrices" + # XXX: even if sparse support is added, the test below should be kept + # unless fit_intercept is properly handled for sparse matrices + # (by manually considering X_offset in calculations) + if fit_intercept and issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data" + ) return False, None def run(self, n_iter): @@ -80,5 +98,9 @@ def nabla_f(v): v = lbfgs_res.x self.w = v * u_opt(v) + if self.fit_intercept and not issparse(self.X): + intercept = self.y_offset - self.X_offset @ self.w + self.w = np.r_[self.w, intercept] + def get_result(self): return self.w.flatten() diff --git a/solvers/python_pgd.py b/solvers/python_pgd.py index 81f600b..f7bff70 100644 --- a/solvers/python_pgd.py +++ b/solvers/python_pgd.py @@ -22,13 +22,23 @@ class Solver(BaseSolver): ] def skip(self, X, y, lmbd, fit_intercept): - # XXX - not implemented but not too complicated to implement - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + # XXX - intercept not implemented for sparse X but it shouldn't be hard + if fit_intercept and sparse.issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data", + ) return False, None def set_objective(self, X, y, lmbd, fit_intercept): + # Handling intercept: center y and X (dense data only) + if fit_intercept and not sparse.issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept @@ -40,8 +50,10 @@ def run(self, callback): if self.use_acceleration: z = np.zeros(n_features) + intercept = self.y_offset if self.fit_intercept else [] + t_new = 1 - while callback(w): + while callback(np.r_[w, intercept]): if self.use_acceleration: t_old = t_new t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2 @@ -53,7 +65,10 @@ def run(self, callback): w -= self.X.T @ (self.X @ w - self.y) / L w = self.st(w, self.lmbd / L) - self.w = w + if self.fit_intercept: + intercept = self.y_offset - self.X_offset @ w + + self.w = np.r_[w, intercept] def st(self, w, mu): w -= np.clip(w, -mu, mu) diff --git a/solvers/r_pgd.py b/solvers/r_pgd.py index 54c6542..29031b8 100644 --- a/solvers/r_pgd.py +++ b/solvers/r_pgd.py @@ -5,6 +5,7 @@ with safe_import_context() as import_ctx: import numpy as np + from scipy.sparse import issparse from rpy2 import robjects from rpy2.robjects import numpy2ri @@ -34,12 +35,23 @@ class Solver(BaseSolver): ] def skip(self, X, y, lmbd, fit_intercept): - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + # rpy2 does not directly support sparse matrices (workaround exists) + if fit_intercept and issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data" + ) return False, None def set_objective(self, X, y, lmbd, fit_intercept): + # Handling intercept: center y and X (dense data only) + if fit_intercept and not issparse(self.X): + self.X_offset = np.average(X, axis=0) + X -= self.X_offset + self.y_offset = np.average(y, axis=0) + y -= self.y_offset + self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept self.r_pgd = robjects.r['proximal_gradient_descent'] @@ -52,4 +64,8 @@ def run(self, n_iter): self.w = np.array(as_r(coefs, "vector")) def get_result(self): - return self.w.flatten() + if self.fit_intercept: + intercept = self.y_offset - self.X_offset @ self.w + return np.r_[self.w.flatten(), intercept] + else: + return self.w.flatten() diff --git a/solvers/skglm.py b/solvers/skglm.py index 4728297..073b43b 100644 --- a/solvers/skglm.py +++ b/solvers/skglm.py @@ -4,6 +4,7 @@ with safe_import_context() as import_ctx: import warnings import numpy as np + from scipy import sparse from skglm import Lasso from sklearn.exceptions import ConvergenceWarning @@ -14,7 +15,7 @@ class Solver(BaseSolver): install_cmd = 'conda' requirements = [ - 'pip:skglm' + 'pip:skglm', 'scikit-learn' ] references = [ 'Q. Bertrand and Q. Klopfenstein and P.-A. Bannier and G. Gidel' @@ -24,8 +25,11 @@ class Solver(BaseSolver): ] def skip(self, X, y, lmbd, fit_intercept): - if fit_intercept: - return True, f"{self.name} does not handle fit_intercept" + if fit_intercept and sparse.issparse(X): + return ( + True, + f"{self.name} doesn't handle fit_intercept with sparse data", + ) return False, None @@ -37,7 +41,8 @@ def set_objective(self, X, y, lmbd, fit_intercept): n_samples = self.X.shape[0] self.lasso = Lasso( alpha=self.lmbd / n_samples, max_iter=1, max_epochs=50_000, - tol=1e-12, fit_intercept=False, warm_start=False, verbose=False) + tol=1e-12, fit_intercept=self.fit_intercept, warm_start=False, + verbose=False) # Cache Numba compilation self.run(1)