koaning · bradday4 · May 5, 2021 · May 5, 2021 · May 5, 2021 · May 5, 2021
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -23,6 +23,7 @@ jobs:
       run: |
         python -m pip install -e "."
         python -m pip install -e ".[test]"
+        python -m pip install -e ".[ray]"
     - name: Lint with flake8
       run: |
         flake8

diff --git a/demo.py b/demo.py
@@ -1,6 +1,6 @@
 from memo import Runner
 import numpy as np
-
+import ray
 from memo import memlist, memfile, grid, time_taken
 
 data = []
@@ -24,7 +24,9 @@ def birthday_experiment(class_size, n_sim):
 
 # To Run in parallel
 
+data = []
+ray.init(address='auto', _redis_password='5241590000000000')
 
-settings = grid(class_size=range(20, 30), n_sim=[100, 10_000, 1_000_000], progbar=False)
-runner = Runner(backend="threading", n_jobs=-1)
+settings = list(grid(class_size=range(20, 30), n_sim=[100, 10_000, 1_000_000], progbar=False))
+runner = Runner(backend="ray", n_jobs=-1)
 runner.run(func=birthday_experiment, settings=settings)
diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -1,10 +1,10 @@
 ## Base Scenario
 
-Let's say you're running a simulation, or maybe a machine learning experiment. Then you 
-might have code that looks like this; 
+Let's say you're running a simulation, or maybe a machine learning experiment. Then you
+might have code that looks like this;
 
 ```python
-import numpy as np 
+import numpy as np
 
 def birthday_experiment(class_size, n_sim=10_000):
     """Simulates the birthday paradox. Vectorized = Fast!"""
@@ -16,18 +16,18 @@ def birthday_experiment(class_size, n_sim=10_000):
 results = [birthday_experiment(s) for s in range(2, 40)]
 ```
 
-This example sort of works, but how would we now go about plotting our results? If you want 
+This example sort of works, but how would we now go about plotting our results? If you want
 to plot the effect of `class_size` and the simulated probability then it'd be do-able. But things
-get tricky if you're also interested in seeing the effect of `n_sim` as well. The input of the 
-simulation isn't nicely captured together with the output of the simulation. 
+get tricky if you're also interested in seeing the effect of `n_sim` as well. The input of the
+simulation isn't nicely captured together with the output of the simulation.
 
 ## Decorators
 
 The idea behind this library is that you can rewrite this function, only slightly, to make
-all of this data collection a whole log simpler. 
+all of this data collection a whole log simpler.
 
 ```python
-import numpy as np 
+import numpy as np
 from memo import memlist
 
 data = []
@@ -52,15 +52,15 @@ via `pd.DataFrame(data)`.
 ## Logging More
 
 The `memlist` decorate takes care of all data collection. It captures all keyword
-arguments of the function as well as the dictionary output of the function. This 
-then is appended this to a list `data`. Especially when you're iteration on your 
-experiments this might turn out to be a lovely pattern. 
+arguments of the function as well as the dictionary output of the function. This
+then is appended this to a list `data`. Especially when you're iteration on your
+experiments this might turn out to be a lovely pattern.
 
 For example, suppose we also want to log how long the simulation takes;
 
 ```python
-import time 
-import numpy as np 
+import time
+import numpy as np
 from memo import memlist
 
 data = []
@@ -81,14 +81,14 @@ for size in range(2, 40):
         birthday_experiment(class_size=size, n_sim=n_sim)
 ```
 
-## Power 
+## Power
 
-The real power of the library is that you can choose not only to log to 
-a list. You can just as easily write to a file too! 
+The real power of the library is that you can choose not only to log to
+a list. You can just as easily write to a file too!
 
 ```python
-import time 
-import numpy as np 
+import time
+import numpy as np
 from memo import memlist, memfile
 
 data = []
@@ -110,15 +110,15 @@ for size in range(2, 40):
         birthday_experiment(class_size=size, n_sim=n_sim)
 ```
 
-## Utilities 
+## Utilities
 
-The library also offers utilities to make the creation of these grids even easier. In particular; 
+The library also offers utilities to make the creation of these grids even easier. In particular;
 
-- We supply a grid generation mechanism to prevent a lot of for-loops. 
+- We supply a grid generation mechanism to prevent a lot of for-loops.
 - We supply a `@capture_time` so that you don't need to write that logic yourself.
 
 ```python
-import numpy as np 
+import numpy as np
 from memo import memlist, memfile, grid, time_taken
 
 data = []
@@ -138,10 +138,10 @@ for settings in grid(class_size=range(2, 40), n_sim=[1000, 10000, 100000]):
     birthday_experiment(**settings)
 ```
 
-## Parallel 
+## Parallel
 
 If you have a lot of simulations you'd like to run, it might be helpful to
-run them in parallel. That's why this library also hosts a `Runner` class 
+run them in parallel. That's why this library also hosts a `Runner` class
 that can run your functions on multiple CPU cores.
 
 ```python
@@ -162,7 +162,7 @@ def birthday_experiment(class_size, n_sim):
     proba = np.mean(n_uniq != class_size)
     return {"est_proba": proba}
 
-settings = grid(class_size=range(20, 30), n_sim=[100, 10_000, 1_000_000], progbar=False)
+settings = list(grid(class_size=range(20, 30), n_sim=[100, 10_000, 1_000_000], progbar=False))
 
 # To Run in parallel
 runner = Runner(backend="threading", n_jobs=-1)
@@ -173,10 +173,10 @@ runner.run(func=birthday_experiment, settings=settings)
 
 These decorators aren't performing magic, but my experience has been
 that these decorators make it more fun to actually log the results of experiments.
-It's nice to be able to just add a decorator to a function and not have to 
+It's nice to be able to just add a decorator to a function and not have to
 worry about logging the statistics.
 
-The library also offers extra features to make things a whole *log* simpler.  
+The library also offers extra features to make things a whole _log_ simpler.
 
 - `memweb` sends the json blobs to a server via http-post requests
 - `memfunc` sends the data to a callable that you supply, like `print`

diff --git a/memo/_runner.py b/memo/_runner.py
@@ -5,28 +5,43 @@
 from rich.progress import Progress
 import time
 import warnings
+from memo import NotInstalled
 
 
 class Runner:
     """
     Run functions in parallel with joblib.
 
     Arguments:
-        backend: choice of parallism backend, can be "loky", "multiprocessing" or "threading"
+        backend: choice of parallism backend, can be "loky", "multiprocessing", "threading", or "ray"
         n_jobs: degree of parallism, set to -1 to use all available cores
 
     All keyword arguments during instantiaition will pass through to `parallel_backend`.
     More information on joblib can be found [here](https://joblib.readthedocs.io/en/latest/parallel.html).
-    Joblib can also attach to third party backends such as Ray or Apache spark,
-    however that functionality has not yet been tested.
+    Joblib can also attach to third party backends such as [Ray](https://docs.ray.io/en/releases-1.3.0/) or Apache spark.
 
     Usage:
 
+
     ```python
     from memo import Runner
 
     runner = Runner(backend='threading', n_jobs=2)
     ```
+
+
+    With Ray Backend From the command line
+    ```shell
+    ray start --head --port=6379
+    ```
+
+    ```python
+    from memo import Runner
+
+    runner = Runner(backend='ray', n_jobs=-1)
+    ```
+
+
     """
 
     def __init__(
@@ -45,6 +60,13 @@ def _run(self, func: Callable, settings: Iterable[Dict]) -> None:
         """run the parallel backend
         Private. All arguments passed through run method
         """
+        if self.backend == "ray":
+            try:
+                from ray.util.joblib import register_ray
+                register_ray()
+            except ImportError:
+                NotInstalled("ray", "ray")
+
         try:
             with parallel_backend(*self.args, self.backend, self.n_jobs, **self.kwargs):
                 Parallel(require="sharedmem")(

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+filterwarnings =
+    ignore::DeprecationWarning
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
     "pytest>=4.0.2",
     "numpy>=1.19.4",
     "mktestdocs>=0.1.0",
-    "tqdm>=4.54.0",
+    "tqdm>=4.54.0"
 ] + base_packages
 
 util_packages = [
@@ -25,6 +25,8 @@
 
 web_packages = ["httpx>=0.16.1"] + base_packages
 
+ray_packages = ["ray>=1.3.0"] + base_packages
+
 setup(
     name="memo",
     version="0.1.3",
@@ -34,5 +36,6 @@
         "web": web_packages,
         "test": test_packages,
         "dev": dev_packages,
+        "ray": ray_packages
     },
 )
diff --git a/tests/test_runner.py b/tests/test_runner.py
@@ -1,10 +1,12 @@
 import pytest
 from memo import memlist, Runner, grid
+import ray
+ray.init()
 
 
 @pytest.mark.parametrize(
     "kw",
-    [{"backend": "loky"}, {"backend": "threading"}, {"backend": "multiprocessing"}],
+    [{"backend": "loky"}, {"backend": "threading"}, {"backend": "multiprocessing"}, {"backend": "ray"}],
 )
 def test_base_multiple_calls(kw):
     data = []
@@ -21,7 +23,7 @@ def count_values(n_jobs=-1, **kwargs):
 
 @pytest.mark.parametrize(
     "kw",
-    [{"backend": "loky"}, {"backend": "threading"}, {"backend": "multiprocessing"}],
+    [{"backend": "loky"}, {"backend": "threading"}, {"backend": "multiprocessing"}, {"backend": "ray"}],
 )
 def test_keys_included(kw):
     data = []
@@ -39,7 +41,7 @@ def count_values(**kwargs):
 
 @pytest.mark.parametrize(
     "kw",
-    [{"backend": "loky"}, {"backend": "threading"}, {"backend": "multiprocessing"}],
+    [{"backend": "loky"}, {"backend": "threading"}, {"backend": "multiprocessing"}, {"backend": "ray"}],
 )
 def test_base_args_included(kw):
     data = []
@@ -62,7 +64,7 @@ def count_values(a, b, **kwargs):
 
 def test_raises_type_error():
     data = []
-    g = {"a": 3, "b": 4, "c": 5}
+    g = [{"a": 3, "b": 4, "c": 5}]
 
     with pytest.raises(TypeError):