From 3232c370b4ff8a9c6f607f00c829ff9002a9c287 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Tue, 30 Nov 2021 16:17:03 +0100 Subject: [PATCH 01/19] Fix bug in glmnet solver --- solvers/glmnet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 7431a67c..17b95a65 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -42,14 +42,15 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.glmnet = robjects.r['glmnet'] def run(self, n_iter): - fit_dict = {"lambda.min.ratio": self.lmbd / self.lmbd_max} + fit_dict = {"lambda": self.lmbd} + glmnet_fit = self.glmnet(self.X, self.y, intercept=False, standardize=False, maxit=n_iter, thresh=1e-14, **fit_dict) results = dict(zip(glmnet_fit.names, list(glmnet_fit))) as_matrix = robjects.r['as'] coefs = np.array(as_matrix(results["beta"], "matrix")) - self.w = coefs[:, -1] + self.w = coefs.flatten() def get_result(self): return self.w From df67ce628b68db36e321a8c626ade614121b583a Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 1 Dec 2021 11:06:54 +0100 Subject: [PATCH 02/19] add debug script --- test_glmnet.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 test_glmnet.py diff --git a/test_glmnet.py b/test_glmnet.py new file mode 100644 index 00000000..6878c354 --- /dev/null +++ b/test_glmnet.py @@ -0,0 +1,36 @@ +import numpy as np +from rpy2 import robjects +from rpy2.robjects import numpy2ri +from rpy2.robjects.packages import importr +import celer +from numpy.linalg import norm + +numpy2ri.activate() +glmnet = importr('glmnet') +as_matrix = robjects.r['as'] + +n = 10 +p = 30 + +np.random.seed(1532) + +y = np.random.randn(n) +X = np.random.randn(n, p) + +lmbd_max = np.max(np.abs(X.T @ y)) / n + +for ratio in [0.1, 1e-3]: + lmbd = ratio * lmbd_max + + fit_dict2 = {"lambda": lmbd} + fit2 = glmnet.glmnet(X, y, intercept=False, + standardize=False, **fit_dict2) + results2 = dict(zip(fit2.names, list(fit2))) + + beta = np.squeeze(np.array(as_matrix(results2["beta"], "matrix"))) + + clf = celer.Lasso(fit_intercept=False, alpha=lmbd, tol=1e-6).fit(X, y) + print(f"lambda = {ratio}*lambda_max") + print(beta) + print(clf.coef_) + print(f"norm of difference: {norm(beta - clf.coef_)}") From 6aa76de981576ca295d703e48257d63a08e561f0 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 1 Dec 2021 11:14:12 +0100 Subject: [PATCH 03/19] add computation of objective functions --- test_glmnet.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_glmnet.py b/test_glmnet.py index 6878c354..5e4b191f 100644 --- a/test_glmnet.py +++ b/test_glmnet.py @@ -19,6 +19,12 @@ lmbd_max = np.max(np.abs(X.T @ y)) / n + +def p_obj(X, y, w, lmbd): + R = y - X @ w + return np.mean(R ** 2) / 2. + lmbd * norm(w, ord=1) + + for ratio in [0.1, 1e-3]: lmbd = ratio * lmbd_max @@ -34,3 +40,5 @@ print(beta) print(clf.coef_) print(f"norm of difference: {norm(beta - clf.coef_)}") + print(f"celer obj: {p_obj(X, y, clf.coef_, lmbd)}") + print(f"GLMnet obj: {p_obj(X, y, beta, lmbd)}") From 47ed26502c8bf0fb2ddc7883b87dbbaee215d26e Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 1 Dec 2021 11:16:35 +0100 Subject: [PATCH 04/19] test that for lambda just below lambda max both solutions are non zero --- test_glmnet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_glmnet.py b/test_glmnet.py index 5e4b191f..efbcdc95 100644 --- a/test_glmnet.py +++ b/test_glmnet.py @@ -25,7 +25,8 @@ def p_obj(X, y, w, lmbd): return np.mean(R ** 2) / 2. + lmbd * norm(w, ord=1) -for ratio in [0.1, 1e-3]: +for ratio in [1, 0.95, 1e-3]: + print("#" * 80) lmbd = ratio * lmbd_max fit_dict2 = {"lambda": lmbd} From 4d2464802a5c10aaa04d563f9cd2cab536d71906 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Wed, 1 Dec 2021 11:26:54 +0100 Subject: [PATCH 05/19] Drop unnecessary computation of lambda_max --- solvers/glmnet.py | 1 - 1 file changed, 1 deletion(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 17b95a65..d790d7e0 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -38,7 +38,6 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.X, self.y, self.lmbd = X, y, lmbd self.fit_intercept = fit_intercept - self.lmbd_max = np.max(np.abs(X.T @ y)) self.glmnet = robjects.r['glmnet'] def run(self, n_iter): From fed8fa451c19873d5db0875444e48b1c7079fcb8 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Wed, 1 Dec 2021 11:27:36 +0100 Subject: [PATCH 06/19] Harmonize lambda scaling for glmnet solver --- solvers/glmnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index d790d7e0..5cdb5326 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -41,7 +41,7 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.glmnet = robjects.r['glmnet'] def run(self, n_iter): - fit_dict = {"lambda": self.lmbd} + fit_dict = {"lambda": self.lmbd / self.X.shape[0]} glmnet_fit = self.glmnet(self.X, self.y, intercept=False, standardize=False, maxit=n_iter, From 790607a13dd53ade500d0ce74f509e2bd7501899 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Mon, 6 Dec 2021 13:51:33 +0100 Subject: [PATCH 07/19] Switch benchopt method to use tolerance instead --- solvers/glmnet.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 5cdb5326..4d95fec4 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -24,7 +24,7 @@ class Solver(BaseSolver): 'for generalized linear models via coordinate descent", ' 'J. Stat. Softw., vol. 33, no. 1, pp. 1-22, NIH Public Access (2010)' ] - stop_strategy = 'iteration' + stop_strategy = 'tolerance' support_sparse = False def skip(self, X, y, lmbd, fit_intercept): @@ -40,12 +40,14 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.glmnet = robjects.r['glmnet'] - def run(self, n_iter): - fit_dict = {"lambda": self.lmbd / self.X.shape[0]} + def run(self, tol): + fit_dict = {"lambda": self.lmbd / len(self.y)} + + print(tol) glmnet_fit = self.glmnet(self.X, self.y, intercept=False, - standardize=False, maxit=n_iter, - thresh=1e-14, **fit_dict) + standardize=False, maxit=1_000_000, + thresh=tol / len(self.y), **fit_dict) results = dict(zip(glmnet_fit.names, list(glmnet_fit))) as_matrix = robjects.r['as'] coefs = np.array(as_matrix(results["beta"], "matrix")) From 8f947d8710fc1360404bdd62a45dd836b7cc41f4 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Mon, 6 Dec 2021 15:38:12 +0100 Subject: [PATCH 08/19] tweaks to test_glmnet --- test_glmnet.py | 89 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/test_glmnet.py b/test_glmnet.py index efbcdc95..366ef66d 100644 --- a/test_glmnet.py +++ b/test_glmnet.py @@ -1,45 +1,86 @@ +import sys +from timeit import default_timer as timer + +import benchopt +from benchopt.datasets import make_correlated_data import numpy as np +from numpy.linalg import norm from rpy2 import robjects from rpy2.robjects import numpy2ri from rpy2.robjects.packages import importr -import celer -from numpy.linalg import norm +import matplotlib.pyplot as plt +from celer import Lasso +import time +# Setup the system to allow rpy2 running numpy2ri.activate() glmnet = importr('glmnet') + as_matrix = robjects.r['as'] -n = 10 -p = 30 +n = 100 +p = 500 np.random.seed(1532) y = np.random.randn(n) -X = np.random.randn(n, p) +x, y, _ = make_correlated_data(n, p, random_state=0) + +lmbd_max = np.max(np.abs(x.T @ y)) + +lmbd = lmbd_max * 0.01 + +fit_dict = {"lambda": lmbd / n} + +tols = np.geomspace(1e-1, 1e-16, 11) + + +def compute_gap(x, y, beta, lmbd): + # compute residuals + diff = y - x.dot(beta) + + # compute primal objective and duality gap + p_obj = .5 * diff.dot(diff) + lmbd * np.abs(beta).sum() + theta = diff / lmbd + theta /= norm(x.T @ theta, ord=np.inf) + d_obj = (norm(y)**2 / 2. - lmbd**2 * norm(y / lmbd - theta)**2 / 2) + + return p_obj - d_obj + + +n_tols = len(tols) + +times = np.empty(n_tols) +gaps = np.empty(n_tols) -lmbd_max = np.max(np.abs(X.T @ y)) / n +for i, tol in enumerate(tols): + print(i, tol) + t0 = timer() + fit = glmnet.glmnet(x, + y, + intercept=False, + standardize=False, + **fit_dict, + thresh=tol, + maxit=10_000_000) -def p_obj(X, y, w, lmbd): - R = y - X @ w - return np.mean(R ** 2) / 2. + lmbd * norm(w, ord=1) + times[i] = timer() - t0 + results = dict(zip(fit.names, list(fit))) + coefs = np.array(as_matrix(results["beta"], "matrix")) + beta = coefs.flatten() -for ratio in [1, 0.95, 1e-3]: - print("#" * 80) - lmbd = ratio * lmbd_max + gaps[i] = compute_gap(x, y, beta, lmbd) - fit_dict2 = {"lambda": lmbd} - fit2 = glmnet.glmnet(X, y, intercept=False, - standardize=False, **fit_dict2) - results2 = dict(zip(fit2.names, list(fit2))) +plt.close('all') +plt.semilogy(times, gaps) +plt.xlabel("time (s)") +plt.ylabel("duality gap") +plt.show(block=False) - beta = np.squeeze(np.array(as_matrix(results2["beta"], "matrix"))) - clf = celer.Lasso(fit_intercept=False, alpha=lmbd, tol=1e-6).fit(X, y) - print(f"lambda = {ratio}*lambda_max") - print(beta) - print(clf.coef_) - print(f"norm of difference: {norm(beta - clf.coef_)}") - print(f"celer obj: {p_obj(X, y, clf.coef_, lmbd)}") - print(f"GLMnet obj: {p_obj(X, y, beta, lmbd)}") +t0 = time.time() +clf = Lasso(alpha=lmbd/n, fit_intercept=False, tol=1e-14, verbose=1).fit(x, y) +t1 = time.time() +print(t1 - t0) From 1be208a9b3cb7f9170bc3b6ece0f83ec19efeeef Mon Sep 17 00:00:00 2001 From: mathurinm Date: Mon, 6 Dec 2021 17:37:00 +0100 Subject: [PATCH 09/19] change glmnet criterion to use a higher patience --- solvers/glmnet.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 4d95fec4..9e1256d2 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -1,5 +1,6 @@ from benchopt import BaseSolver from benchopt import safe_import_context +from benchopt.stopping_criterion import SufficientProgressCriterion with safe_import_context() as import_ctx: @@ -24,9 +25,11 @@ class Solver(BaseSolver): 'for generalized linear models via coordinate descent", ' 'J. Stat. Softw., vol. 33, no. 1, pp. 1-22, NIH Public Access (2010)' ] - stop_strategy = 'tolerance' support_sparse = False + stopping_criterion = SufficientProgressCriterion( + patience=5, eps=1e-38, strategy='tolerance') + def skip(self, X, y, lmbd, fit_intercept): # XXX - glmnet support intercept, adapt the API if fit_intercept: @@ -43,8 +46,6 @@ def set_objective(self, X, y, lmbd, fit_intercept): def run(self, tol): fit_dict = {"lambda": self.lmbd / len(self.y)} - print(tol) - glmnet_fit = self.glmnet(self.X, self.y, intercept=False, standardize=False, maxit=1_000_000, thresh=tol / len(self.y), **fit_dict) From 46f6f0436c3da558405ed7184505f387d14d41b3 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Tue, 7 Dec 2021 14:01:53 +0100 Subject: [PATCH 10/19] faster tol decrease for glmnet, try to have common initial point --- solvers/glmnet.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 9e1256d2..990dc5a7 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -1,5 +1,5 @@ -from benchopt import BaseSolver -from benchopt import safe_import_context +from benchopt import BaseSolver, safe_import_context +from benchopt.runner import INFINITY from benchopt.stopping_criterion import SufficientProgressCriterion @@ -44,11 +44,12 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.glmnet = robjects.r['glmnet'] def run(self, tol): + maxit = 0 if tol == INFINITY else 1_000_000 fit_dict = {"lambda": self.lmbd / len(self.y)} glmnet_fit = self.glmnet(self.X, self.y, intercept=False, - standardize=False, maxit=1_000_000, - thresh=tol / len(self.y), **fit_dict) + standardize=False, maxit=maxit, + thresh=tol ** 2.3, **fit_dict) results = dict(zip(glmnet_fit.names, list(glmnet_fit))) as_matrix = robjects.r['as'] coefs = np.array(as_matrix(results["beta"], "matrix")) From 68a379cab9666a811371c331b2bdb41dd0e35118 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Tue, 7 Dec 2021 14:40:50 +0100 Subject: [PATCH 11/19] increase patience again --- solvers/glmnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 990dc5a7..95d427d8 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -28,7 +28,7 @@ class Solver(BaseSolver): support_sparse = False stopping_criterion = SufficientProgressCriterion( - patience=5, eps=1e-38, strategy='tolerance') + patience=7, eps=1e-38, strategy='tolerance') def skip(self, X, y, lmbd, fit_intercept): # XXX - glmnet support intercept, adapt the API From c57994db373fdf8f9d1a98963a79a3c50b96c681 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 8 Dec 2021 09:33:45 +0100 Subject: [PATCH 12/19] get common starting point, comment code --- solvers/glmnet.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 95d427d8..8b058446 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -44,12 +44,24 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.glmnet = robjects.r['glmnet'] def run(self, tol): - maxit = 0 if tol == INFINITY else 1_000_000 + # even if maxit=0, glmnet can return non zero coefficients. To get the + # initial point on the curve, we set tol=0: this way, glmnet + # convergence check fails, and rather than returning the current + # iterate, it returns a 0 model. + if tol == INFINITY: + maxit = 0 + thresh = 0 + else: + maxit = 1_000_000 + # we need thresh to decay fast, otherwise the objective curve can + # plateau before convergence + thresh = tol ** 1.8 + fit_dict = {"lambda": self.lmbd / len(self.y)} glmnet_fit = self.glmnet(self.X, self.y, intercept=False, standardize=False, maxit=maxit, - thresh=tol ** 2.3, **fit_dict) + thresh=thresh, **fit_dict) results = dict(zip(glmnet_fit.names, list(glmnet_fit))) as_matrix = robjects.r['as'] coefs = np.array(as_matrix(results["beta"], "matrix")) From 43723de693f26cac7d244d44bc7a88b0aa72fd8d Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 8 Dec 2021 12:42:27 +0100 Subject: [PATCH 13/19] CLN remove test_glmnet.py --- test_glmnet.py | 86 -------------------------------------------------- 1 file changed, 86 deletions(-) delete mode 100644 test_glmnet.py diff --git a/test_glmnet.py b/test_glmnet.py deleted file mode 100644 index 366ef66d..00000000 --- a/test_glmnet.py +++ /dev/null @@ -1,86 +0,0 @@ -import sys -from timeit import default_timer as timer - -import benchopt -from benchopt.datasets import make_correlated_data -import numpy as np -from numpy.linalg import norm -from rpy2 import robjects -from rpy2.robjects import numpy2ri -from rpy2.robjects.packages import importr -import matplotlib.pyplot as plt -from celer import Lasso -import time - -# Setup the system to allow rpy2 running -numpy2ri.activate() -glmnet = importr('glmnet') - -as_matrix = robjects.r['as'] - -n = 100 -p = 500 - -np.random.seed(1532) - -y = np.random.randn(n) -x, y, _ = make_correlated_data(n, p, random_state=0) - -lmbd_max = np.max(np.abs(x.T @ y)) - -lmbd = lmbd_max * 0.01 - -fit_dict = {"lambda": lmbd / n} - -tols = np.geomspace(1e-1, 1e-16, 11) - - -def compute_gap(x, y, beta, lmbd): - # compute residuals - diff = y - x.dot(beta) - - # compute primal objective and duality gap - p_obj = .5 * diff.dot(diff) + lmbd * np.abs(beta).sum() - theta = diff / lmbd - theta /= norm(x.T @ theta, ord=np.inf) - d_obj = (norm(y)**2 / 2. - lmbd**2 * norm(y / lmbd - theta)**2 / 2) - - return p_obj - d_obj - - -n_tols = len(tols) - -times = np.empty(n_tols) -gaps = np.empty(n_tols) - -for i, tol in enumerate(tols): - print(i, tol) - t0 = timer() - - fit = glmnet.glmnet(x, - y, - intercept=False, - standardize=False, - **fit_dict, - thresh=tol, - maxit=10_000_000) - - times[i] = timer() - t0 - - results = dict(zip(fit.names, list(fit))) - coefs = np.array(as_matrix(results["beta"], "matrix")) - beta = coefs.flatten() - - gaps[i] = compute_gap(x, y, beta, lmbd) - -plt.close('all') -plt.semilogy(times, gaps) -plt.xlabel("time (s)") -plt.ylabel("duality gap") -plt.show(block=False) - - -t0 = time.time() -clf = Lasso(alpha=lmbd/n, fit_intercept=False, tol=1e-14, verbose=1).fit(x, y) -t1 = time.time() -print(t1 - t0) From 5fa72e90210697b7ced0a0d2b7559868f35db3dd Mon Sep 17 00:00:00 2001 From: mathurinm Date: Wed, 8 Dec 2021 13:32:55 +0100 Subject: [PATCH 14/19] more comments on glmnet behavior --- solvers/glmnet.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 8b058446..6fd24038 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -29,6 +29,8 @@ class Solver(BaseSolver): stopping_criterion = SufficientProgressCriterion( patience=7, eps=1e-38, strategy='tolerance') + # We use the tolerance strategy because if maxit is too low and glmnet + # convergence check fails, it returns an empty model def skip(self, X, y, lmbd, fit_intercept): # XXX - glmnet support intercept, adapt the API @@ -44,7 +46,7 @@ def set_objective(self, X, y, lmbd, fit_intercept): self.glmnet = robjects.r['glmnet'] def run(self, tol): - # even if maxit=0, glmnet can return non zero coefficients. To get the + # Even if maxit=0, glmnet can return non zero coefficients. To get the # initial point on the curve, we set tol=0: this way, glmnet # convergence check fails, and rather than returning the current # iterate, it returns a 0 model. @@ -57,6 +59,10 @@ def run(self, tol): # plateau before convergence thresh = tol ** 1.8 + # We fit with a single lambda because when computing a path, glmnet + # early stops the path based on an uncontrollable criterion. Fitting + # with a single lambda may be suboptimal if it is small, but there is + # no other way to force glmnet to solve for a prescribed lambda. fit_dict = {"lambda": self.lmbd / len(self.y)} glmnet_fit = self.glmnet(self.X, self.y, intercept=False, From 96e529203ff800a05d6963c4d84158b0d79d00fc Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Thu, 9 Dec 2021 10:58:04 +0100 Subject: [PATCH 15/19] Update solvers/glmnet.py --- solvers/glmnet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/solvers/glmnet.py b/solvers/glmnet.py index 6fd24038..a11383e6 100644 --- a/solvers/glmnet.py +++ b/solvers/glmnet.py @@ -27,10 +27,11 @@ class Solver(BaseSolver): ] support_sparse = False - stopping_criterion = SufficientProgressCriterion( - patience=7, eps=1e-38, strategy='tolerance') # We use the tolerance strategy because if maxit is too low and glmnet # convergence check fails, it returns an empty model + stopping_criterion = SufficientProgressCriterion( + patience=7, eps=1e-38, strategy='tolerance' + ) def skip(self, X, y, lmbd, fit_intercept): # XXX - glmnet support intercept, adapt the API From 704f1eae51b7943af0545d7b2a315c5364276c54 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Thu, 12 May 2022 13:53:27 +0200 Subject: [PATCH 16/19] fix(ci): try to fix failing MacOS julia tests --- .github/workflows/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b8ac4e88..55720bb8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -62,6 +62,11 @@ jobs: branch=${BENCHOPT_BRANCH##*:} pip install -U git+https://github.com/$user/benchopt@$branch + - name: Disable Julia multi-threading on MacOS + if: matrix.os == 'macos-latest' + run: | + export JULIA_NUM_THREADS=1 + - name: Test run: | export OMP_NUM_THREADS=1 From d4bb5a1630b5f3e70d121da73a5fe1e8856c65a4 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Fri, 20 May 2022 10:47:00 +0200 Subject: [PATCH 17/19] ENH add support for ncvreg --- solvers/ncvreg.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 solvers/ncvreg.py diff --git a/solvers/ncvreg.py b/solvers/ncvreg.py new file mode 100644 index 00000000..5efb93f0 --- /dev/null +++ b/solvers/ncvreg.py @@ -0,0 +1,69 @@ +from benchopt import BaseSolver, safe_import_context + +with safe_import_context() as import_ctx: + import numpy as np + from benchopt.helpers.r_lang import import_rpackages + from rpy2 import robjects + from rpy2.robjects import numpy2ri + from scipy import sparse + + # Setup the system to allow rpy2 running + numpy2ri.activate() + import_rpackages("ncvreg") + + +class Solver(BaseSolver): + name = "ncvreg" + + install_cmd = "conda" + requirements = ["r-base", "rpy2", "r-ncvreg"] + references = [ + 'P. Breheny and J. Huang, "Coordinate descent algorithms for nonconvex' + "penalized regression, with applications to biological feature" + 'selection," The Annals of Applied Statistics, vol. 5, no. 1, pp.' + "232–253, Mar. 2011, doi: 10.1214/10-AOAS388." + ] + + def set_objective(self, X, y, lmbd, fit_intercept): + self.lmbd = lmbd + self.fit_intercept = fit_intercept + self.n, self.p = X.shape + + penalty_factor = np.ones(self.p) + + # NOTE(jolars): To fit the intercept, we add a column of ones at the + # end of X and set its penalty factor to zero. + self.X = X + + if self.fit_intercept: + X = np.hstack((X, np.ones((self.n, 1)))) + penalty_factor = np.hstack((penalty_factor, np.array([0.0]))) + + self.penalty_factor = robjects.vectors.FloatVector(penalty_factor) + + self.Xmod = robjects.r.matrix(X, X.shape[0], X.shape[1]) + self.y = robjects.vectors.FloatVector(y) + + # Standardization of X cannot be turned off in standard interface to + # ncvreg, so we have to use ncvfit directly instead + self.ncvfit = robjects.r["ncvfit"] + + def skip(self, X, y, lmbd, fit_intercept): + if sparse.issparse(X): + return True, "ncvreg does not support sparse X" + + return False, None + + def run(self, n_iter): + fit_dict = { + "max.iter": n_iter, + "lambda": self.lmbd / self.n, + "penalty.factor": self.penalty_factor, + } + + self.fit = self.ncvfit(self.Xmod, self.y, penalty="lasso", eps=1e-15, **fit_dict) + + def get_result(self): + results = dict(zip(self.fit.names, list(self.fit))) + + return results["beta"] From 0f0192264b4d8058f303d56a45eb77315f74d657 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Fri, 20 May 2022 10:52:40 +0200 Subject: [PATCH 18/19] FIX break arguments of function to appease linter --- solvers/ncvreg.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/solvers/ncvreg.py b/solvers/ncvreg.py index 5efb93f0..f4338467 100644 --- a/solvers/ncvreg.py +++ b/solvers/ncvreg.py @@ -61,7 +61,13 @@ def run(self, n_iter): "penalty.factor": self.penalty_factor, } - self.fit = self.ncvfit(self.Xmod, self.y, penalty="lasso", eps=1e-15, **fit_dict) + self.fit = self.ncvfit( + self.Xmod, + self.y, + penalty="lasso", + eps=1e-15, + **fit_dict + ) def get_result(self): results = dict(zip(self.fit.names, list(self.fit))) From 75e9b80eb3fb78e9098e4656df4e49312a809796 Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Fri, 20 May 2022 10:54:17 +0200 Subject: [PATCH 19/19] Revert "fix(ci): try to fix failing MacOS julia tests" This reverts commit 704f1eae51b7943af0545d7b2a315c5364276c54. --- .github/workflows/main.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 55720bb8..b8ac4e88 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -62,11 +62,6 @@ jobs: branch=${BENCHOPT_BRANCH##*:} pip install -U git+https://github.com/$user/benchopt@$branch - - name: Disable Julia multi-threading on MacOS - if: matrix.os == 'macos-latest' - run: | - export JULIA_NUM_THREADS=1 - - name: Test run: | export OMP_NUM_THREADS=1