diff --git a/solvers/python_pgd.py b/solvers/python_pgd.py
index 81f600b..14f2e7f 100644
--- a/solvers/python_pgd.py
+++ b/solvers/python_pgd.py
@@ -1,16 +1,30 @@
-from benchopt import BaseSolver, safe_import_context
+from benchopt import BaseSolver
+from benchopt import safe_import_context
+
 
 with safe_import_context() as import_ctx:
+    import cupy as cp
     import numpy as np
     from scipy import sparse
+    from math import sqrt
 
 
 class Solver(BaseSolver):
+
     name = 'Python-PGD'  # proximal gradient, optionally accelerated
-    stopping_strategy = "callback"
+    stopping_strategy = "iteration"
+
+    requirements = [
+        'conda-forge:cupy',
+        'conda-forge:cudatoolkit=11.5'
+    ]
 
     # any parameter defined here is accessible as a class attribute
-    parameters = {'use_acceleration': [False, True]}
+    parameters = {
+        'use_acceleration': [False, True],
+        'use_gpu': [False, True]
+    }
+
     references = [
         'I. Daubechies, M. Defrise and C. De Mol, '
         '"An iterative thresholding algorithm for linear inverse problems '
@@ -22,6 +36,9 @@ class Solver(BaseSolver):
     ]
 
     def skip(self, X, y, lmbd, fit_intercept):
+        if sparse.issparse(X) and self.use_gpu:
+            return True, "sparse is not supported with GPU"
+
         # XXX - not implemented but not too complicated to implement
         if fit_intercept:
             return True, f"{self.name} does not handle fit_intercept"
@@ -29,22 +46,28 @@ def skip(self, X, y, lmbd, fit_intercept):
         return False, None
 
     def set_objective(self, X, y, lmbd, fit_intercept):
+        if self.use_gpu:
+            # transfert X, y to GPU
+            X, y = cp.array(X), cp.array(y)
+
         self.X, self.y, self.lmbd = X, y, lmbd
         self.fit_intercept = fit_intercept
 
-    def run(self, callback):
+    def run(self, n_iter):
         L = self.compute_lipschitz_constant()
 
+        xp = cp if self.use_gpu else np
+
         n_features = self.X.shape[1]
-        w = np.zeros(n_features)
+        w = xp.zeros(n_features)
         if self.use_acceleration:
-            z = np.zeros(n_features)
+            z = xp.zeros(n_features)
 
         t_new = 1
-        while callback(w):
+        for _ in range(n_iter):
             if self.use_acceleration:
                 t_old = t_new
-                t_new = (1 + np.sqrt(1 + 4 * t_old ** 2)) / 2
+                t_new = (1 + sqrt(1 + 4 * t_old ** 2)) / 2
                 w_old = w.copy()
                 z -= self.X.T @ (self.X @ z - self.y) / L
                 w = self.st(z, self.lmbd / L)
@@ -53,10 +76,14 @@ def run(self, callback):
                 w -= self.X.T @ (self.X @ w - self.y) / L
                 w = self.st(w, self.lmbd / L)
 
+        if self.use_gpu:
+            w = cp.asnumpy(w)
+
         self.w = w
 
     def st(self, w, mu):
-        w -= np.clip(w, -mu, mu)
+        xp = cp if self.use_gpu else np
+        w -= xp.clip(w, -mu, mu)
         return w
 
     def get_result(self):
@@ -64,7 +91,7 @@ def get_result(self):
 
     def compute_lipschitz_constant(self):
         if not sparse.issparse(self.X):
-            L = np.linalg.norm(self.X, ord=2) ** 2
-        else:
-            L = sparse.linalg.svds(self.X, k=1)[1][0] ** 2
-        return L
+            xp = cp if self.use_gpu else np
+            return xp.linalg.norm(self.X, ord=2) ** 2
+
+        return sparse.linalg.svds(self.X, k=1)[1][0] ** 2