diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b8ac4e88..1d2b7d7b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -62,9 +62,15 @@ jobs: branch=${BENCHOPT_BRANCH##*:} pip install -U git+https://github.com/$user/benchopt@$branch + - name: Disable Julia multi-threading on MacOS + if: matrix.os == 'macos-latest' + run: | + export JULIA_NUM_THREADS=1 + - name: Test run: | export OMP_NUM_THREADS=1 + export JULIA_NUM_THREADS=1 benchopt test . --env-name bench_test_env -vl benchopt test . --env-name bench_test_env -vl --skip-install diff --git a/solvers/lasso_jl.jl b/solvers/lasso_jl.jl new file mode 100644 index 00000000..715f52a7 --- /dev/null +++ b/solvers/lasso_jl.jl @@ -0,0 +1,51 @@ +using Lasso +using PyCall +using SparseArrays + +function scipyCSC_to_julia(A) + m, n = A.shape + colPtr = Int[i + 1 for i in PyArray(A."indptr")] + rowVal = Int[i + 1 for i in PyArray(A."indices")] + nzVal = Vector{Float64}(PyArray(A."data")) + B = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal) + + return B +end + +function solve_lasso( + X, + y::Vector{Float64}, + lambda::Vector{Float64}, + fit_intercept::Bool, + tol::Float64, + cd_maxiter::Int, + get_null_solution::Bool, +) + p = size(X, 2) + + w = if fit_intercept zeros(Float64, p + 1) else zeros(Float64, p) end + + converged = true + + # TODO(jolars): once https://github.com/JuliaStats/Lasso.jl/issues/70 or + # maybe https://github.com/JuliaStats/Lasso.jl/issues/71 is/are resolved, + # we should not need the try-catch here + if !get_null_solution + try + lasso_fit = fit( + LassoPath, + X, + y; + λ=lambda, standardize=false, intercept=fit_intercept, + maxncoef=max(size(X, 1), size(X, 2)) * 100, + cd_maxiter=cd_maxiter, + cd_tol=tol, + ) + w = coef(lasso_fit) + catch + converged = false + end + end + + return w, converged +end diff --git a/solvers/lasso_jl.py b/solvers/lasso_jl.py new file mode 100644 index 00000000..218d176d --- /dev/null +++ b/solvers/lasso_jl.py @@ -0,0 +1,89 @@ +from benchopt import safe_import_context +from benchopt.helpers.julia import ( + JuliaSolver, + assert_julia_installed, + get_jl_interpreter, +) +from benchopt.runner import INFINITY +from benchopt.stopping_criterion import SufficientProgressCriterion + +with safe_import_context() as import_ctx: + assert_julia_installed() + import warnings + import numpy as np + from scipy import sparse + + from pathlib import Path + # File containing the function to be called from julia + JULIA_SOLVER_FILE = str(Path(__file__).with_suffix(".jl")) + + +class Solver(JuliaSolver): + name = "lasso_jl" + stopping_criterion = SufficientProgressCriterion( + patience=7, eps=1e-15, strategy="tolerance" + ) + julia_requirements = [ + "Lasso", + "PyCall", + "SparseArrays", + ] + references = [ + 'J. Friedman, T. J. Hastie and R. Tibshirani, "Regularization paths ' + 'for generalized linear models via coordinate descent", ' + "J. Stat. Softw., vol. 33, no. 1, pp. 1-22, NIH Public Access (2010)" + ] + + def set_objective(self, X, y, lmbd, fit_intercept): + self.n, self.p = X.shape + self.X = X + self.y = y + self.lmbd = np.array([lmbd]) + self.fit_intercept = fit_intercept + + jl = get_jl_interpreter() + jl.include(str(JULIA_SOLVER_FILE)) + self.solve_lasso = jl.solve_lasso + + if sparse.issparse(X): + scipyCSC_to_julia = jl.pyfunctionret( + jl.scipyCSC_to_julia, jl.Any, jl.PyObject + ) + self.X = scipyCSC_to_julia(X) + + # Trigger Julia JIT compilation + w_dim = self.p + 1 if fit_intercept else self.p + self.prev_solution = np.zeros(w_dim) + + warnings.filterwarnings("ignore", category=FutureWarning) + self.run(1e-2) + + def run(self, tol): + # remove possibly spurious warnings from pyjulia + # TODO: remove filter when + # https://github.com/JuliaPy/pyjulia/issues/497 is resolved or + # otherwise fix the warning + warnings.filterwarnings("ignore", category=FutureWarning) + + coefs, converged = self.solve_lasso( + self.X, + self.y, + self.lmbd / len(self.y), + self.fit_intercept, + tol**1.8, + 1_000_000, + tol == INFINITY, + ) + if converged: + self.coefs = coefs + self.prev_solution = coefs + else: + self.coefs = self.prev_solution + + def get_result(self): + coefs = np.ravel(self.coefs) + + if self.fit_intercept: + coefs = np.hstack((coefs[1:], coefs[0])) + + return coefs diff --git a/test_config.py b/test_config.py index d320944e..297b8aa2 100644 --- a/test_config.py +++ b/test_config.py @@ -15,6 +15,10 @@ def check_test_solver_install(solver_class): if 'julia' in solver_class.name.lower(): pytest.xfail('Julia install from conda fails currently.') + # Julia segfauls on OSX + if solver_class.name.lower() == 'lasso_jl' and sys.platform == 'darwin': + pytest.xfail('Julia solvers segfauls on OSX.') + # ModOpt install change numpy version, breaking celer install. # See CEA-COSMIC/ModOpt#144. Skipping for now if ('modopt' in solver_class.name.lower()):